Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r73469 - in trunk: boost/locale libs/locale/src/encoding libs/locale/src/util libs/locale/test
From: artyomtnk_at_[hidden]
Date: 2011-08-01 04:04:30


Author: artyom
Date: 2011-08-01 04:04:27 EDT (Mon, 01 Aug 2011)
New Revision: 73469
URL: http://svn.boost.org/trac/boost/changeset/73469

Log:
utf_to_utf is now inline and implemented in headers

Added:
   trunk/boost/locale/definitions.hpp (contents, props changed)
   trunk/boost/locale/encoding_errors.hpp (contents, props changed)
   trunk/boost/locale/encoding_utf.hpp (contents, props changed)
   trunk/boost/locale/utf.hpp (contents, props changed)
   trunk/boost/locale/utf_encoding.hpp (contents, props changed)
   trunk/libs/locale/test/test_utf.cpp (contents, props changed)
Text files modified:
   trunk/boost/locale/config.hpp | 22 -----
   trunk/boost/locale/encoding.hpp | 110 ----------------------------
   trunk/boost/locale/util.hpp | 5
   trunk/libs/locale/src/encoding/wconv_codepage.ipp | 68 ++---------------
   trunk/libs/locale/src/util/codecvt_converter.cpp | 149 ++++-----------------------------------
   trunk/libs/locale/test/Jamfile.v2 | 1
   6 files changed, 35 insertions(+), 320 deletions(-)

Modified: trunk/boost/locale/config.hpp
==============================================================================
--- trunk/boost/locale/config.hpp (original)
+++ trunk/boost/locale/config.hpp 2011-08-01 04:04:27 EDT (Mon, 01 Aug 2011)
@@ -8,26 +8,7 @@
 #ifndef BOOST_LOCALE_CONFIG_HPP_INCLUDED
 #define BOOST_LOCALE_CONFIG_HPP_INCLUDED
 
-#include <boost/config.hpp>
-
-// Support older ICU versions
-#ifndef BOOST_SYMBOL_VISIBLE
-# define BOOST_SYMBOL_VISIBLE
-#endif
-
-#ifdef BOOST_HAS_DECLSPEC
-# if defined(BOOST_ALL_DYN_LINK) || defined(BOOST_LOCALE_DYN_LINK)
-# ifdef BOOST_LOCALE_SOURCE
-# define BOOST_LOCALE_DECL BOOST_SYMBOL_EXPORT
-# else
-# define BOOST_LOCALE_DECL BOOST_SYMBOL_IMPORT
-# endif // BOOST_LOCALE_SOURCE
-# endif // DYN_LINK
-#endif // BOOST_HAS_DECLSPEC
-
-#ifndef BOOST_LOCALE_DECL
-# define BOOST_LOCALE_DECL
-#endif
+#include <boost/locale/definitions.hpp>
 
 //
 // Automatically link to the correct build variant where possible.
@@ -51,7 +32,6 @@
 #endif // auto-linking disabled
 
 
-
 #endif // boost/locale/config.hpp
 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
 

Added: trunk/boost/locale/definitions.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/locale/definitions.hpp 2011-08-01 04:04:27 EDT (Mon, 01 Aug 2011)
@@ -0,0 +1,34 @@
+//
+// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
+//
+// Distributed under the Boost Software License, Version 1.0. (See
+// accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+#ifndef BOOST_LOCALE_DEFINITIONS_HPP_INCLUDED
+#define BOOST_LOCALE_DEFINITIONS_HPP_INCLUDED
+
+#include <boost/config.hpp>
+
+// Support older ICU versions
+#ifndef BOOST_SYMBOL_VISIBLE
+# define BOOST_SYMBOL_VISIBLE
+#endif
+
+#ifdef BOOST_HAS_DECLSPEC
+# if defined(BOOST_ALL_DYN_LINK) || defined(BOOST_LOCALE_DYN_LINK)
+# ifdef BOOST_LOCALE_SOURCE
+# define BOOST_LOCALE_DECL BOOST_SYMBOL_EXPORT
+# else
+# define BOOST_LOCALE_DECL BOOST_SYMBOL_IMPORT
+# endif // BOOST_LOCALE_SOURCE
+# endif // DYN_LINK
+#endif // BOOST_HAS_DECLSPEC
+
+#ifndef BOOST_LOCALE_DECL
+# define BOOST_LOCALE_DECL
+#endif
+
+#endif // boost/locale/config.hpp
+// vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+

Modified: trunk/boost/locale/encoding.hpp
==============================================================================
--- trunk/boost/locale/encoding.hpp (original)
+++ trunk/boost/locale/encoding.hpp 2011-08-01 04:04:27 EDT (Mon, 01 Aug 2011)
@@ -14,8 +14,8 @@
 # pragma warning(disable : 4275 4251 4231 4660)
 #endif
 #include <boost/locale/info.hpp>
-#include <boost/cstdint.hpp>
-#include <stdexcept>
+#include <boost/locale/encoding_errors.hpp>
+#include <boost/locale/encoding_utf.hpp>
 
 
 
@@ -32,38 +32,6 @@
             /// @{
 
             ///
- /// \brief The excepton that is thrown in case of conversion error
- ///
- class BOOST_SYMBOL_VISIBLE conversion_error : public std::runtime_error {
- public:
- conversion_error() : std::runtime_error("Conversion failed") {}
- };
-
- ///
- /// \brief This exception is thrown in case of use of unsupported
- /// or invalid character set
- ///
- class BOOST_SYMBOL_VISIBLE invalid_charset_error : public std::runtime_error {
- public:
-
- /// Create an error for charset \a charset
- invalid_charset_error(std::string charset) :
- std::runtime_error("Invalid or unsupported charset:" + charset)
- {
- }
- };
-
-
- ///
- /// enum that defines conversion policy
- ///
- typedef enum {
- skip = 0, ///< Skip illegal/unconvertable characters
- stop = 1, ///< Stop conversion and throw conversion_error
- default_method = skip ///< Default method - skip
- } method_type;
-
- ///
             /// convert string to UTF string from text in range [begin,end) encoded with \a charset according to policy \a how
             ///
             template<typename CharType>
@@ -260,80 +228,6 @@
             BOOST_LOCALE_DECL std::string from_utf(char32_t const *begin,char32_t const *end,std::string const &charset,method_type how);
             #endif
 
- namespace details {
-
- template<typename CharOut,typename CharIn>
- struct utf_to_utf_traits {
- static std::basic_string<CharOut>
- convert(CharIn const *begin,CharIn const *end,method_type how)
- {
- // Make more efficient in fututre - UTF-16/UTF-32 should be quite
- // simple and fast
- return to_utf<CharOut>(from_utf(begin,end,"UTF-8",how),"UTF-8",how);
- }
- };
- template<typename CharOut>
- struct utf_to_utf_traits<CharOut,char> {
- static std::basic_string<CharOut>
- convert(char const *begin,char const *end,method_type how)
- {
- return to_utf<CharOut>(begin,end,"UTF-8",how);
- }
- };
- template<typename CharIn>
- struct utf_to_utf_traits<char,CharIn> {
- static std::string
- convert(CharIn const *begin,CharIn const *end,method_type how)
- {
- return from_utf(begin,end,"UTF-8",how);
- }
- };
- template<>
- struct utf_to_utf_traits<char,char> { // just test valid
- static std::string
- convert(char const *begin,char const *end,method_type how)
- {
- return from_utf(begin,end,"UTF-8",how);
- }
- };
- }
-
- /// \endcond
-
- ///
- /// Convert a Unicode text in range [begin,end) to other Unicode encoding
- ///
- template<typename CharOut,typename CharIn>
- std::basic_string<CharOut>
- utf_to_utf(CharIn const *begin,CharIn const *end,method_type how = default_method)
- {
- return details::utf_to_utf_traits<CharOut,CharIn>::convert(begin,end,how);
- }
-
- ///
- /// Convert a Unicode NUL terminated string \a str other Unicode encoding
- ///
- template<typename CharOut,typename CharIn>
- std::basic_string<CharOut>
- utf_to_utf(CharIn const *str,method_type how = default_method)
- {
- CharIn const *end = str;
- while(*end)
- end++;
- return utf_to_utf<CharOut,CharIn>(str,end,how);
- }
-
-
- ///
- /// Convert a Unicode string \a str other Unicode encoding
- ///
- template<typename CharOut,typename CharIn>
- std::basic_string<CharOut>
- utf_to_utf(std::basic_string<CharIn> const &str,method_type how = default_method)
- {
- return utf_to_utf<CharOut,CharIn>(str.c_str(),str.c_str()+str.size(),how);
- }
-
 
             /// @}
 

Added: trunk/boost/locale/encoding_errors.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/locale/encoding_errors.hpp 2011-08-01 04:04:27 EDT (Mon, 01 Aug 2011)
@@ -0,0 +1,75 @@
+//
+// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
+//
+// Distributed under the Boost Software License, Version 1.0. (See
+// accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+#ifndef BOOST_LOCALE_ENCODING_ERRORS_HPP_INCLUDED
+#define BOOST_LOCALE_ENCODING_ERRORS_HPP_INCLUDED
+
+#include <boost/locale/definitions.hpp>
+#ifdef BOOST_MSVC
+# pragma warning(push)
+# pragma warning(disable : 4275 4251 4231 4660)
+#endif
+#include <stdexcept>
+
+
+
+namespace boost {
+ namespace locale {
+ namespace conv {
+ ///
+ /// \addtogroup codepage
+ ///
+ /// @{
+
+ ///
+ /// \brief The excepton that is thrown in case of conversion error
+ ///
+ class BOOST_SYMBOL_VISIBLE conversion_error : public std::runtime_error {
+ public:
+ conversion_error() : std::runtime_error("Conversion failed") {}
+ };
+
+ ///
+ /// \brief This exception is thrown in case of use of unsupported
+ /// or invalid character set
+ ///
+ class BOOST_SYMBOL_VISIBLE invalid_charset_error : public std::runtime_error {
+ public:
+
+ /// Create an error for charset \a charset
+ invalid_charset_error(std::string charset) :
+ std::runtime_error("Invalid or unsupported charset:" + charset)
+ {
+ }
+ };
+
+
+ ///
+ /// enum that defines conversion policy
+ ///
+ typedef enum {
+ skip = 0, ///< Skip illegal/unconvertable characters
+ stop = 1, ///< Stop conversion and throw conversion_error
+ default_method = skip ///< Default method - skip
+ } method_type;
+
+
+ /// @}
+
+ } // conv
+
+ } // locale
+} // boost
+
+#ifdef BOOST_MSVC
+#pragma warning(pop)
+#endif
+
+#endif
+
+// vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+

Added: trunk/boost/locale/encoding_utf.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/locale/encoding_utf.hpp 2011-08-01 04:04:27 EDT (Mon, 01 Aug 2011)
@@ -0,0 +1,92 @@
+//
+// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
+//
+// Distributed under the Boost Software License, Version 1.0. (See
+// accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+#ifndef BOOST_LOCALE_ENCODING_UTF_HPP_INCLUDED
+#define BOOST_LOCALE_ENCODING_UTF_HPP_INCLUDED
+
+#include <boost/locale/utf.hpp>
+#include <boost/locale/encoding_errors.hpp>
+#include <iterator>
+#ifdef BOOST_MSVC
+# pragma warning(push)
+# pragma warning(disable : 4275 4251 4231 4660)
+#endif
+
+
+
+namespace boost {
+ namespace locale {
+ namespace conv {
+ ///
+ /// \addtogroup codepage
+ ///
+ /// @{
+
+ ///
+ /// Convert a Unicode text in range [begin,end) to other Unicode encoding
+ ///
+ template<typename CharOut,typename CharIn>
+ std::basic_string<CharOut>
+ utf_to_utf(CharIn const *begin,CharIn const *end,method_type how = default_method)
+ {
+ std::basic_string<CharOut> result;
+ result.reserve(end-begin);
+ std::back_insert_iterator<std::basic_string<CharOut> > inserter(result);
+ utf::code_point c;
+ while(begin!=end) {
+ c=utf::utf_traits<CharIn>::template decode(begin,end);
+ if(c==utf::illegal || c==utf::incomplete) {
+ if(how==stop)
+ throw conversion_error();
+ }
+ else {
+ utf::utf_traits<CharOut>::template encode(c,inserter);
+ }
+ }
+ return result;
+ }
+
+ ///
+ /// Convert a Unicode NUL terminated string \a str other Unicode encoding
+ ///
+ template<typename CharOut,typename CharIn>
+ std::basic_string<CharOut>
+ utf_to_utf(CharIn const *str,method_type how = default_method)
+ {
+ CharIn const *end = str;
+ while(*end)
+ end++;
+ return utf_to_utf<CharOut,CharIn>(str,end,how);
+ }
+
+
+ ///
+ /// Convert a Unicode string \a str other Unicode encoding
+ ///
+ template<typename CharOut,typename CharIn>
+ std::basic_string<CharOut>
+ utf_to_utf(std::basic_string<CharIn> const &str,method_type how = default_method)
+ {
+ return utf_to_utf<CharOut,CharIn>(str.c_str(),str.c_str()+str.size(),how);
+ }
+
+
+ /// @}
+
+ } // conv
+
+ } // locale
+} // boost
+
+#ifdef BOOST_MSVC
+#pragma warning(pop)
+#endif
+
+#endif
+
+// vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+

Added: trunk/boost/locale/utf.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/locale/utf.hpp 2011-08-01 04:04:27 EDT (Mon, 01 Aug 2011)
@@ -0,0 +1,454 @@
+//
+// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
+//
+// Distributed under the Boost Software License, Version 1.0. (See
+// accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+#ifndef BOOST_LOCALE_UTF_HPP_INCLUDED
+#define BOOST_LOCALE_UTF_HPP_INCLUDED
+
+#include <boost/cstdint.hpp>
+
+namespace boost {
+namespace locale {
+///
+/// \brief Namespace that holds basic operations on UTF encoded sequences
+///
+/// All functions defined in this namespace do not require linking with Boost.Locale library
+///
+namespace utf {
+ /// \cond INTERNAL
+ #ifdef __GNUC__
+ # define BOOST_LOCALE_LIKELY(x) __builtin_expect((x),1)
+ # define BOOST_LOCALE_UNLIKELY(x) __builtin_expect((x),0)
+ #else
+ # define BOOST_LOCALE_LIKELY(x) (x)
+ # define BOOST_LOCALE_UNLIKELY(x) (x)
+ #endif
+ /// \endcond
+
+ ///
+ /// \brief The integral type type that can hold a Unicode code point
+ ///
+ typedef uint32_t code_point;
+
+ ///
+ /// \brief Special constant that defines illegal code point
+ ///
+ static const code_point illegal = 0xFFFFFFFFu;
+
+ ///
+ /// \brief Special constant that defines incomplete code point
+ ///
+ static const code_point incomplete = 0xFFFFFFFEu;
+
+ ///
+ /// \brief the function checks if \a v is a valid code point
+ ///
+ inline bool is_valid_codepoint(code_point v)
+ {
+ if(v>0x10FFFF)
+ return false;
+ if(0xD800 <=v && v<= 0xDFFF) // surragates
+ return false;
+ return true;
+ }
+
+ #ifdef BOOST_LOCALE_DOXYGEN
+ ///
+ /// \brief UTF Traits class - functions to convert UTF sequences to and from Unicode code points
+ ///
+ template<typename CharType,int size=sizeof(CharType)>
+ struct utf_traits {
+ ///
+ /// The type of the character
+ ///
+ typedef CharType char_type;
+ ///
+ /// Read one code point from the range [p,e) and return it.
+ ///
+ /// - If the sequence that was read is incomplete sequence returns \ref incomplete,
+ /// - If illegal sequence detected returns \ref illegal
+ ///
+ /// Requirements
+ ///
+ /// - Iterator is valid input iterator
+ ///
+ /// Postconditions
+ ///
+ /// - p points to the last consumed character
+ ///
+ template<typename Iterator>
+ static code_point decode(Iterator &p,Iterator e);
+
+ ///
+ /// Maximal width of valid sequence in the code units:
+ ///
+ /// - UTF-8 - 4
+ /// - UTF-16 - 2
+ /// - UTF-32 - 1
+ ///
+ static const int max_width;
+ ///
+ /// The width of specific code point in the code units.
+ ///
+ /// Requirement: value is a valid Unicode code point
+ /// Returns value in range [1..max_width]
+ ///
+ static int width(code_point value);
+
+ ///
+ /// Get the size of the trail part of variable length encoded sequence.
+ ///
+ /// Returns -1 if C is not valid lead character
+ ///
+ static int trail_length(char_type c);
+ ///
+ /// Returns true if c is trail code unit, always false for UTF-32
+ ///
+ static bool is_trail(char_type c);
+ ///
+ /// Returns true if c is lead code unit, always true of UTF-32
+ ///
+ static bool is_lead(char_type c);
+
+ ///
+ /// Convert valid Unicode code point \a value to the UTF sequence.
+ ///
+ /// Requirements:
+ ///
+ /// - \a value is valid code point
+ /// - \a out is an output iterator should be able to accept at least width(value) units
+ ///
+ /// Returns the iterator past the last written code unit.
+ ///
+ template<typename Iterator>
+ static Iterator encode(code_point value,Iterator out)
+ ///
+ /// Decodes valid UTF sequence that is pointed by p into code point.
+ ///
+ /// If the sequence is invalid or points to end the behavior is undefined
+ ///
+ template<typename Iterator>
+ static code_point decode_valid(Iterator &p)
+ };
+
+ #else
+
+ template<typename CharType,int size=sizeof(CharType)>
+ struct utf_traits;
+
+ template<typename CharType>
+ struct utf_traits<CharType,1> {
+
+ typedef CharType char_type;
+
+ static int trail_length(char_type ci)
+ {
+ unsigned char c = ci;
+ if(c < 128)
+ return 0;
+ if(BOOST_LOCALE_UNLIKELY(c < 194))
+ return -1;
+ if(c < 224)
+ return 1;
+ if(c < 240)
+ return 2;
+ if(BOOST_LOCALE_LIKELY(c <=244))
+ return 3;
+ return -1;
+ }
+
+ static const int max_width = 4;
+
+ static int width(code_point value)
+ {
+ if(value <=0x7F) {
+ return 1;
+ }
+ else if(value <=0x7FF) {
+ return 2;
+ }
+ else if(BOOST_LOCALE_LIKELY(value <=0xFFFF)) {
+ return 3;
+ }
+ else {
+ return 4;
+ }
+ }
+
+ static bool is_trail(char_type ci)
+ {
+ unsigned char c=ci;
+ return (c & 0xC0)==0x80;
+ }
+
+ static bool is_lead(char_type ci)
+ {
+ return !is_trail(ci);
+ }
+
+ template<typename Iterator>
+ static code_point decode(Iterator &p,Iterator e)
+ {
+ if(BOOST_LOCALE_UNLIKELY(p==e))
+ return incomplete;
+
+ unsigned char lead = *p++;
+
+ // First byte is fully validated here
+ int trail_size = trail_length(lead);
+
+ if(BOOST_LOCALE_UNLIKELY(trail_size < 0))
+ return illegal;
+
+ //
+ // Ok as only ASCII may be of size = 0
+ // also optimize for ASCII text
+ //
+ if(trail_size == 0)
+ return lead;
+
+ code_point c = lead & ((1<<(6-trail_size))-1);
+
+ // Read the rest
+ unsigned char tmp;
+ switch(trail_size) {
+ case 3:
+ if(BOOST_LOCALE_UNLIKELY(p==e))
+ return incomplete;
+ tmp = *p++;
+ c = (c << 6) | ( tmp & 0x3F);
+ case 2:
+ if(BOOST_LOCALE_UNLIKELY(p==e))
+ return incomplete;
+ tmp = *p++;
+ c = (c << 6) | ( tmp & 0x3F);
+ case 1:
+ if(BOOST_LOCALE_UNLIKELY(p==e))
+ return incomplete;
+ tmp = *p++;
+ c = (c << 6) | ( tmp & 0x3F);
+ }
+
+ // Check code point validity: no surrogates and
+ // valid range
+ if(BOOST_LOCALE_UNLIKELY(!is_valid_codepoint(c)))
+ return illegal;
+
+ // make sure it is the most compact representation
+ if(BOOST_LOCALE_UNLIKELY(width(c)!=trail_size + 1))
+ return illegal;
+
+ return c;
+
+ }
+
+ template<typename Iterator>
+ static code_point decode_valid(Iterator &p)
+ {
+ unsigned char lead = *p++;
+ if(lead < 192)
+ return lead;
+
+ int trail_size;
+
+ if(lead < 224)
+ trail_size = 1;
+ else if(BOOST_LOCALE_LIKELY(lead < 240)) // non-BMP rare
+ trail_size = 2;
+ else
+ trail_size = 3;
+
+ code_point c = lead & ((1<<(6-trail_size))-1);
+
+ switch(trail_size) {
+ case 3:
+ c = (c << 6) | ( static_cast<unsigned char>(*p++) & 0x3F);
+ case 2:
+ c = (c << 6) | ( static_cast<unsigned char>(*p++) & 0x3F);
+ case 1:
+ c = (c << 6) | ( static_cast<unsigned char>(*p++) & 0x3F);
+ }
+
+ return c;
+ }
+
+
+
+ template<typename Iterator>
+ static Iterator encode(code_point value,Iterator out)
+ {
+ if(value <=0x7F) {
+ *out++ = value;
+ }
+ else if(value <=0x7FF) {
+ *out++=(value >> 6) | 0xC0;
+ *out++=(value & 0x3F) | 0x80;
+ }
+ else if(BOOST_LOCALE_LIKELY(value <=0xFFFF)) {
+ *out++=(value >> 12) | 0xE0;
+ *out++=((value >> 6) & 0x3F) | 0x80;
+ *out++=(value & 0x3F) | 0x80;
+ }
+ else {
+ *out++=(value >> 18) | 0xF0;
+ *out++=((value >> 12) & 0x3F) | 0x80;
+ *out++=((value >> 6) & 0x3F) | 0x80;
+ *out++=(value & 0x3F) | 0x80;
+ }
+ return out;
+ }
+ }; // utf8
+
+ template<typename CharType>
+ struct utf_traits<CharType,2> {
+ typedef CharType char_type;
+
+ // See RFC 2781
+ static bool is_first_surrogate(uint16_t x)
+ {
+ return 0xD800 <=x && x<= 0xDBFF;
+ }
+ static bool is_second_surrogate(uint16_t x)
+ {
+ return 0xDC00 <=x && x<= 0xDFFF;
+ }
+ static code_point combine_surrogate(uint16_t w1,uint16_t w2)
+ {
+ return ((code_point(w1 & 0x3FF) << 10) | (w2 & 0x3FF)) + 0x10000;
+ }
+ static int trail_length(char_type c)
+ {
+ if(is_first_surrogate(c))
+ return 1;
+ if(is_second_surrogate(c))
+ return -1;
+ return 0;
+ }
+ ///
+ /// Returns true if c is trail code unit, always false for UTF-32
+ ///
+ static bool is_trail(char_type c)
+ {
+ return is_second_surrogate(c);
+ }
+ ///
+ /// Returns true if c is lead code unit, always true of UTF-32
+ ///
+ static bool is_lead(char_type c)
+ {
+ return !is_second_surrogate(c);
+ }
+
+ template<typename It>
+ static code_point decode(It &current,It last)
+ {
+ if(BOOST_LOCALE_UNLIKELY(current == last))
+ return incomplete;
+ uint16_t w1=*current++;
+ if(BOOST_LOCALE_LIKELY(w1 < 0xD800 || 0xDFFF < w1)) {
+ return w1;
+ }
+ if(w1 > 0xDBFF)
+ return illegal;
+ if(current==last)
+ return incomplete;
+ uint16_t w2=*current++;
+ if(w2 < 0xDC00 || 0xDFFF < w2)
+ return illegal;
+ return combine_surrogate(w1,w2);
+ }
+ template<typename It>
+ static code_point decode_valid(It &current)
+ {
+ uint16_t w1=*current++;
+ if(BOOST_LOCALE_LIKELY(w1 < 0xD800 || 0xDFFF < w1)) {
+ return w1;
+ }
+ uint16_t w2=*current++;
+ return combine_surrogate(w1,w2);
+ }
+
+ static const int max_width = 2;
+ static int width(code_point u)
+ {
+ return u>=0x10000 ? 2 : 1;
+ }
+ template<typename It>
+ static It encode(code_point u,It out)
+ {
+ if(BOOST_LOCALE_LIKELY(u<=0xFFFF)) {
+ *out++ = u;
+ }
+ else {
+ u-=0x10000;
+ *out++=0xD800 | (u>>10);
+ *out++=0xDC00 | (u & 0x3FF);
+ }
+ return out;
+ }
+ }; // utf16;
+
+
+ template<typename CharType>
+ struct utf_traits<CharType,4> {
+ typedef CharType char_type;
+ static int trail_length(char_type c)
+ {
+ if(is_valid_codepoint(c))
+ return 0;
+ return -1;
+ }
+ static bool is_trail(char_type /*c*/)
+ {
+ return false;
+ }
+ static bool is_lead(char_type /*c*/)
+ {
+ return true;
+ }
+
+ template<typename It>
+ static code_point decode_valid(It &current)
+ {
+ return *current++;
+ }
+
+ template<typename It>
+ static code_point decode(It &current,It last)
+ {
+ if(BOOST_LOCALE_UNLIKELY(current == last))
+ return boost::locale::utf::incomplete;
+ code_point c=*current++;
+ if(BOOST_LOCALE_UNLIKELY(!is_valid_codepoint(c)))
+ return boost::locale::utf::illegal;
+ return c;
+ }
+ static const int max_width = 1;
+ static int width(code_point /*u*/)
+ {
+ return 1;
+ }
+ template<typename It>
+ static It encode(code_point u,It out)
+ {
+ *out++ = u;
+ return out;
+ }
+
+ }; // utf32
+
+ #endif
+
+
+} // utf
+} // locale
+} // boost
+
+
+#endif
+
+// vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+

Added: trunk/boost/locale/utf_encoding.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/locale/utf_encoding.hpp 2011-08-01 04:04:27 EDT (Mon, 01 Aug 2011)
@@ -0,0 +1,97 @@
+//
+// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
+//
+// Distributed under the Boost Software License, Version 1.0. (See
+// accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+#ifndef BOOST_LOCALE_ENCODING_HPP_INCLUDED
+#define BOOST_LOCALE_ENCODING_HPP_INCLUDED
+
+#include <boost/locale/utf.hpp>
+#include <boost/locale/encoding_erros.hpp>
+
+#ifdef BOOST_MSVC
+# pragma warning(push)
+# pragma warning(disable : 4275 4251 4231 4660)
+#endif
+
+
+namespace boost {
+ namespace locale {
+ namespace conv {
+ ///
+ /// \addtogroup codepage Character conversion functions
+ ///
+ /// @{
+
+ ///
+ /// Convert a Unicode text in range [begin,end) to other Unicode encoding
+ ///
+ /// This function does not require linking with Boost.Locale library
+ ///
+ template<typename CharOut,typename CharIn>
+ std::basic_string<CharOut>
+ utf_to_utf(CharIn const *begin,CharIn const *end,method_type how = default_method)
+ {
+ std::basic_string<CharOut> result;
+ result.reserve(end-begin);
+ std::back_insert_iterator<std::basic_string<CharOut> > inserter(result);
+ utf::code_point c;
+ while(begin!=end) {
+ c=utf::utf_traits<CharIn>::template decode(begin,end);
+ if(c==utf::illegal || c==utf::incomplete) {
+ if(how==stop)
+ throw conversion_error();
+ }
+ else {
+ utf::utf_traits<CharOut>::template encode(c,inserter);
+ }
+ }
+ return result;
+ }
+
+ ///
+ /// Convert a Unicode NUL terminated string \a str other Unicode encoding
+ ///
+ /// This function does not require linking with Boost.Locale library
+ ///
+ template<typename CharOut,typename CharIn>
+ std::basic_string<CharOut>
+ utf_to_utf(CharIn const *str,method_type how = default_method)
+ {
+ CharIn const *end = str;
+ while(*end)
+ end++;
+ return utf_to_utf<CharOut,CharIn>(str,end,how);
+ }
+
+
+ ///
+ /// Convert a Unicode string \a str other Unicode encoding
+ ///
+ /// This function does not require linking with Boost.Locale library
+ ///
+ template<typename CharOut,typename CharIn>
+ std::basic_string<CharOut>
+ utf_to_utf(std::basic_string<CharIn> const &str,method_type how = default_method)
+ {
+ return utf_to_utf<CharOut,CharIn>(str.c_str(),str.c_str()+str.size(),how);
+ }
+
+
+ /// @}
+
+ } // conv
+
+ } // locale
+} // boost
+
+#ifdef BOOST_MSVC
+#pragma warning(pop)
+#endif
+
+#endif
+
+// vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+

Modified: trunk/boost/locale/util.hpp
==============================================================================
--- trunk/boost/locale/util.hpp (original)
+++ trunk/boost/locale/util.hpp 2011-08-01 04:04:27 EDT (Mon, 01 Aug 2011)
@@ -10,6 +10,7 @@
 #include <locale>
 #include <typeinfo>
 #include <boost/cstdint.hpp>
+#include <boost/locale/utf.hpp>
 #include <boost/locale/generator.hpp>
 #include <boost/assert.hpp>
 
@@ -81,13 +82,13 @@
         /// For example if a UCS-32 code-point is in the range reserved for UTF-16 surrogates
         /// or an invalid UTF-8 sequence is found
         ///
- static const uint32_t illegal=0xFFFFFFFF;
+ static const uint32_t illegal=utf::illegal;
 
         ///
         /// This value is returned in following cases: The of incomplete input sequence was found or
         /// insufficient output buffer was provided so complete output could not be written.
         ///
- static const uint32_t incomplete=0xFFFFFFFE;
+ static const uint32_t incomplete=utf::incomplete;
         
         virtual ~base_converter()
         {

Modified: trunk/libs/locale/src/encoding/wconv_codepage.ipp
==============================================================================
--- trunk/libs/locale/src/encoding/wconv_codepage.ipp (original)
+++ trunk/libs/locale/src/encoding/wconv_codepage.ipp 2011-08-01 04:04:27 EDT (Mon, 01 Aug 2011)
@@ -202,19 +202,11 @@
     template<typename CharType>
     bool validate_utf16(CharType const *str,unsigned len)
     {
- for(unsigned i=0;i<len;i++) {
- uint16_t c = static_cast<uint16_t>(str[i]);
-
- if(0xD800 <= c && c<= 0xDBFF) {
- i++;
- if(i>=len)
- return false;
- c=static_cast<uint16_t>(str[i]);
- if(0xDC00 <= c && c <= 0xDFFF)
- continue;
- return false;
- }
- else if(0xDC00 <= c && c <=0xDFFF)
+ CharType const *begin = str;
+ CharType const *end = str+len;
+ while(begin!=end) {
+ utf::code_point c = utf::utf_traits<CharType,2>::template decode(begin,end);
+ if(c==utf::illegal || c==utf::incomplete)
                 return false;
         }
         return true;
@@ -436,30 +428,10 @@
             multibyte_to_wide(code_page_,begin,end,how_ == skip,buf);
             remove_substitutions(buf);
 
- size_t n=buf.size();
- string_type res;
- res.reserve(n);
- for(unsigned i=0;i<n;i++) {
- wchar_t cur = buf[i];
- if(0xD800 <= cur && cur<= 0xDBFF) {
- i++;
- if(i>=n)
- throw conversion_error();
- if(0xDC00 <= buf[i] && buf[i]<=0xDFFF) {
- uint32_t w1 = cur;
- uint32_t w2 = buf[i];
- uint32_t norm = ((uint32_t(w1 & 0x3FF) << 10) | (w2 & 0x3FF)) + 0x10000;
- res+=char_type(norm);
- }
- else
- throw conversion_error();
- }
- else if(0xDC00 <= cur && cur<=0xDFFF)
- throw conversion_error();
- else
- res+=char_type(cur);
- }
- return res;
+ if(buf.empty())
+ return string_type();
+
+ return utf_to_utf<CharType>(&buf[0],&buf[0]+buf.size(),how_);
         }
     private:
         method_type how_;
@@ -488,27 +460,7 @@
 
         virtual std::string convert(CharType const *begin,CharType const *end)
         {
- std::wstring tmp;
- tmp.reserve(end-begin);
- while(begin!=end) {
- uint32_t cur = *begin++;
- if(cur > 0x10FFFF || (0xD800 <=cur && cur <=0xDFFF)) {
- if(how_ == skip)
- continue;
- else
- throw conversion_error();
- }
- if(cur > 0xFFFF) {
- uint32_t u = cur - 0x10000;
- wchar_t first = 0xD800 | (u>>10);
- wchar_t second = 0xDC00 | (u & 0x3FF);
- tmp+=first;
- tmp+=second;
- }
- else {
- tmp+=wchar_t(cur);
- }
- }
+ std::wstring tmp = utf_to_utf<wchar_t>(begin,end,how_);
 
             std::vector<char> ctmp;
             wide_to_multibyte(code_page_,tmp.c_str(),tmp.c_str()+tmp.size(),how_ == skip,ctmp);

Modified: trunk/libs/locale/src/util/codecvt_converter.cpp
==============================================================================
--- trunk/libs/locale/src/util/codecvt_converter.cpp (original)
+++ trunk/libs/locale/src/util/codecvt_converter.cpp 2011-08-01 04:04:27 EDT (Mon, 01 Aug 2011)
@@ -51,143 +51,30 @@
 
         virtual uint32_t to_unicode(char const *&begin,char const *end)
         {
- unsigned char const *p=reinterpret_cast<unsigned char const *>(begin);
- unsigned char const *e=reinterpret_cast<unsigned char const *>(end);
- if(p==e)
- return incomplete;
- unsigned char c=*p++;
- unsigned char seq0,seq1=0,seq2=0,seq3=0;
- seq0=c;
- int len=1;
- if((c & 0xC0) == 0xC0) {
- if(p==e)
- return incomplete;
- seq1=*p++;
- len=2;
- }
- if((c & 0xE0) == 0xE0) {
- if(p==e)
- return incomplete;
- seq2=*p++;
- len=3;
- }
- if((c & 0xF0) == 0xF0) {
- if(p==e)
- return incomplete;
- seq3=*p++;
- len=4;
- }
- switch(len) {
- case 1:
- if(seq0 <= 0x7F)
- break;
- return illegal;
- case 2: // non-overloading 2 bytes
- if( 0xC2 <= seq0 && seq0 <= 0xDF
- && 0x80 <= seq1 && seq1<= 0xBF)
- {
- break;
- }
- return illegal;
- case 3:
- if(seq0==0xE0) { // exclude overloading
- if(0xA0 <=seq1 && seq1<= 0xBF && 0x80 <=seq2 && seq2<=0xBF)
- break;
- }
- else if( (0xE1 <= seq0 && seq0 <=0xEC) || seq0==0xEE || seq0==0xEF) { // stright 3 bytes
- if(0x80 <=seq1 && seq1<=0xBF &&
- 0x80 <=seq2 && seq2<=0xBF)
- break;
- }
- else if(seq0 == 0xED) { // exclude surrogates
- if( 0x80 <=seq1 && seq1<=0x9F &&
- 0x80 <=seq2 && seq2<=0xBF)
- break;
- }
+ char const *p=begin;
+
+ utf::code_point c = utf::utf_traits<char>::decode(p,end);
+
+ if(c==utf::illegal)
                 return illegal;
- case 4:
- switch(seq0) {
- case 0xF0: // planes 1-3
- if( 0x90 <=seq1 && seq1<=0xBF &&
- 0x80 <=seq2 && seq2<=0xBF &&
- 0x80 <=seq3 && seq3<=0xBF)
- break;
- return illegal;
- case 0xF1: // planes 4-15
- case 0xF2:
- case 0xF3:
- if( 0x80 <=seq1 && seq1<=0xBF &&
- 0x80 <=seq2 && seq2<=0xBF &&
- 0x80 <=seq3 && seq3<=0xBF)
- break;
- return illegal;
- case 0xF4: // pane 16
- if( 0x80 <=seq1 && seq1<=0x8F &&
- 0x80 <=seq2 && seq2<=0xBF &&
- 0x80 <=seq3 && seq3<=0xBF)
- break;
- return illegal;
- default:
- return illegal;
- }
- }
- begin=reinterpret_cast<char const *>(p);
- switch(len) {
- case 1:
- return seq0;
- case 2:
- return ((seq0 & 0x1F) << 6) | (seq1 & 0x3F);
- case 3:
- return ((seq0 & 0x0F) << 12) | ((seq1 & 0x3F) << 6) | (seq2 & 0x3F) ;
- default: // can be only 4
- return ((seq0 & 0x07) << 18) | ((seq1 & 0x3F) << 12) | ((seq2 & 0x3F) << 6) | (seq3 & 0x3F) ;
- }
+
+ if(c==utf::incomplete)
+ return incomplete;
+
+ begin = p;
+ return c;
         }
+
         virtual uint32_t from_unicode(uint32_t u,char *begin,char const *end)
         {
- if(u>0x10ffff)
- return illegal;
- if(0xd800 <=u && u<= 0xdfff) // surrogates
+ if(!utf::is_valid_codepoint(u))
                 return illegal;
+ int width = utf::utf_traits<char>::width(u);
             ptrdiff_t d=end-begin;
- if(u <=0x7F) {
- if(d>=1) {
- *begin++=u;
- return 1;
- }
- else
- return incomplete;
- }
- else if(u <= 0x7FF) {
- if(d>=2) {
- *begin++=(u >> 6) | 0xC0;
- *begin++=(u & 0x3F) | 0x80;
- return 2;
- }
- else
- return incomplete;
- }
- else if(u <= 0xFFFF) {
- if(d>=3) {
- *begin++=(u >> 12) | 0xE0;
- *begin++=((u >> 6) & 0x3F) | 0x80;
- *begin++=(u & 0x3F) | 0x80;
- return 3;
- }
- else
- return incomplete;
- }
- else {
- if(d>=4) {
- *begin++=(u >> 18) | 0xF0;
- *begin++=((u >> 12) & 0x3F) | 0x80;
- *begin++=((u >> 6) & 0x3F) | 0x80;
- *begin++=(u & 0x3F) | 0x80;
- return 4;
- }
- else
- return incomplete;
- }
+ if(d < width)
+ return incomplete;
+ utf::utf_traits<char>::encode(u,begin);
+ return width;
         }
     }; // utf8_converter
 

Modified: trunk/libs/locale/test/Jamfile.v2
==============================================================================
--- trunk/libs/locale/test/Jamfile.v2 (original)
+++ trunk/libs/locale/test/Jamfile.v2 2011-08-01 04:04:27 EDT (Mon, 01 Aug 2011)
@@ -24,6 +24,7 @@
         # Configuration Information
         [ run test_config.cpp : : : <test-info>always_show_run_output ]
         # Shared
+ [ run test_utf.cpp ]
         [ run test_date_time.cpp ]
         [ run test_ios_prop.cpp ]
         [ run test_codepage_converter.cpp ]

Added: trunk/libs/locale/test/test_utf.cpp
==============================================================================
--- (empty file)
+++ trunk/libs/locale/test/test_utf.cpp 2011-08-01 04:04:27 EDT (Mon, 01 Aug 2011)
@@ -0,0 +1,302 @@
+//
+// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
+//
+// Distributed under the Boost Software License, Version 1.0. (See
+// accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+#include "test_locale.hpp"
+#include "test_locale_tools.hpp"
+#include <boost/locale/utf.hpp>
+
+#include <string.h>
+
+using namespace boost::locale::utf;
+
+char *make2(unsigned v)
+{
+ static unsigned char buf[3] = {0};
+ buf[0] = 0xC0 | (v >> 6);
+ buf[1] = 0x80 | (v & 0x3F );
+ return reinterpret_cast<char*>(buf);
+}
+
+char *make3(unsigned v)
+{
+ static unsigned char buf[4] = {0};
+ buf[0] = 0xE0 | ((v >> 12) ) ;
+ buf[1] = 0x80 | ((v >> 6) & 0x3F );
+ buf[2] = 0x80 | ((v >> 0) & 0x3F );
+ return reinterpret_cast<char*>(buf);
+}
+
+char *make4(unsigned v)
+{
+ static unsigned char buf[5] = {0};
+ buf[0] = 0xF0 | ((v >> 18) ) ;
+ buf[1] = 0x80 | ((v >> 12) & 0x3F );
+ buf[2] = 0x80 | ((v >> 6) & 0x3F );
+ buf[3] = 0x80 | ((v >> 0) & 0x3F );
+ return reinterpret_cast<char*>(buf);
+}
+
+boost::uint32_t const *u32_seq(boost::uint32_t a)
+{
+ static uint32_t buf[2];
+ buf[0]=a;
+ buf[1]=0;
+ return buf;
+}
+
+boost::uint16_t const *u16_seq(boost::uint16_t a)
+{
+ static uint16_t buf[2];
+ buf[0]=a;
+ buf[1]=0;
+ return buf;
+}
+
+boost::uint16_t const *u16_seq(boost::uint16_t a,boost::uint16_t b)
+{
+ static uint16_t buf[3];
+ buf[0]=a;
+ buf[1]=b;
+ buf[2]=0;
+ return buf;
+}
+
+template<typename CharType>
+void test_to(CharType const *s,unsigned codepoint)
+{
+ CharType const *begin = s;
+ CharType const *end = begin;
+
+ while(*end)
+ end++;
+
+ typedef utf_traits<CharType> tr;
+
+ TEST(tr::max_width == 4 / sizeof(CharType));
+
+ TEST(tr::template decode(begin,end) == codepoint);
+
+ if(codepoint == incomplete || codepoint != illegal)
+ TEST(end == begin);
+
+ if(codepoint == incomplete) {
+ TEST(*s== 0 || 0 < tr::trail_length(*s));
+ TEST(tr::trail_length(*s) + 1 > end - s);
+ }
+
+ if(codepoint != incomplete && codepoint != illegal) {
+ begin=s;
+ TEST(tr::is_lead(*begin));
+ TEST(!tr::is_trail(*begin));
+ begin++;
+ while(begin!=end) {
+ TEST(tr::is_trail(*begin));
+ TEST(!tr::is_lead(*begin));
+ begin++;
+ }
+ TEST(tr::width(codepoint)==end - s);
+ TEST(tr::trail_length(*s) == tr::width(codepoint) - 1);
+ begin = s;
+ TEST(tr::decode_valid(begin) == codepoint);
+ TEST(begin == end);
+ }
+}
+
+template<typename CharType>
+void test_from(CharType const *str,unsigned codepoint)
+{
+ CharType buf[5] = {1,1,1,1,1};
+ CharType *p=buf;
+ p = utf_traits<CharType>::template encode(codepoint,p);
+ CharType const *end = str;
+ while(*end)
+ end++;
+ TEST(end - str == p-buf );
+ TEST(*p);
+ *p=0;
+ TEST(memcmp(str,buf,sizeof(CharType) * (end-str))==0);
+}
+
+
+int main()
+{
+ try {
+
+ std::cout << "Test UTF-8" << std::endl;
+ std::cout << "- From UTF-8" << std::endl;
+
+
+ std::cout << "-- Correct" << std::endl;
+
+ test_to("\x7f",0x7f);
+ test_to("\xc2\x80",0x80);
+ test_to("\xdf\xbf",0x7ff);
+ test_to("\xe0\xa0\x80",0x800);
+ test_to("\xef\xbf\xbf",0xffff);
+ test_to("\xf0\x90\x80\x80",0x10000);
+ test_to("\xf4\x8f\xbf\xbf",0x10ffff);
+
+ std::cout << "-- Too big" << std::endl;
+ test_to("\xf4\x9f\x80\x80",illegal); // 11 0000
+ test_to("\xfb\xbf\xbf\xbf",illegal); // 3ff ffff
+ test_to("\xf8\x90\x80\x80\x80",illegal); // 400 0000
+ test_to("\xfd\xbf\xbf\xbf\xbf\xbf",illegal); // 7fff ffff
+
+ std::cout << "-- Invalid length" << std::endl;
+
+ /// test that this actually works
+ test_to(make2(0x80),0x80);
+ test_to(make2(0x7ff),0x7ff);
+
+ test_to(make3(0x800),0x800);
+ test_to(make3(0xffff),0xffff);
+
+ test_to(make4(0x10000),0x10000);
+ test_to(make4(0x10ffff),0x10ffff);
+
+ test_to(make4(0x110000),illegal);
+ test_to(make4(0x1fffff),illegal);
+
+ test_to(make2(0),illegal);
+ test_to(make3(0),illegal);
+ test_to(make4(0),illegal);
+ test_to(make2(0x7f),illegal);
+ test_to(make3(0x7f),illegal);
+ test_to(make4(0x7f),illegal);
+
+ test_to(make3(0x80),illegal);
+ test_to(make4(0x80),illegal);
+ test_to(make3(0x7ff),illegal);
+ test_to(make4(0x7ff),illegal);
+
+ test_to(make4(0x8000),illegal);
+ test_to(make4(0xffff),illegal);
+
+ std::cout << "-- Invalid surrogate" << std::endl;
+
+ test_to(make3(0xd800),illegal);
+ test_to(make3(0xdbff),illegal);
+ test_to(make3(0xdc00),illegal);
+ test_to(make3(0xdfff),illegal);
+
+ test_to(make4(0xd800),illegal);
+ test_to(make4(0xdbff),illegal);
+ test_to(make4(0xdc00),illegal);
+ test_to(make4(0xdfff),illegal);
+
+ std::cout <<"-- Incomplete" << std::endl;
+
+ test_to("",incomplete);
+
+ test_to("\x80",illegal);
+ test_to("\xc2",incomplete);
+
+ test_to("\xdf",incomplete);
+
+ test_to("\xe0",incomplete);
+ test_to("\xe0\xa0",incomplete);
+
+ test_to("\xef\xbf",incomplete);
+ test_to("\xef",incomplete);
+
+ test_to("\xf0\x90\x80",incomplete);
+ test_to("\xf0\x90",incomplete);
+ test_to("\xf0",incomplete);
+
+ test_to("\xf4\x8f\xbf",incomplete);
+ test_to("\xf4\x8f",incomplete);
+ test_to("\xf4",incomplete);
+
+ std::cout << "- To UTF-8" << std::endl;
+
+ std::cout << "-- Test correct" << std::endl;
+
+ test_from("\x7f",0x7f);
+ test_from("\xc2\x80",0x80);
+ test_from("\xdf\xbf",0x7ff);
+ test_from("\xe0\xa0\x80",0x800);
+ test_from("\xef\xbf\xbf",0xffff);
+ test_from("\xf0\x90\x80\x80",0x10000);
+ test_from("\xf4\x8f\xbf\xbf",0x10ffff);
+
+ std::cout << "Test UTF-16" << std::endl;
+ std::cout << "- From UTF-16" << std::endl;
+
+
+ std::cout << "-- Correct" << std::endl;
+
+ test_to(u16_seq(0x10),0x10);
+ test_to(u16_seq(0xffff),0xffff);
+ test_to(u16_seq(0xD800,0xDC00),0x10000);
+ test_to(u16_seq(0xDBFF,0xDFFF),0x10FFFF);
+
+
+ std::cout << "-- Invalid surrogate" << std::endl;
+
+ test_to(u16_seq(0xDFFF),illegal);
+ test_to(u16_seq(0xDC00),illegal);
+
+ std::cout <<"-- Incomplete" << std::endl;
+
+ test_to(u16_seq(0),incomplete);
+ test_to(u16_seq(0xD800),incomplete);
+ test_to(u16_seq(0xDBFF),incomplete);
+
+ std::cout << "- To UTF-16" << std::endl;
+
+ std::cout << "-- Test correct" << std::endl;
+
+ test_to(u16_seq(0x10),0x10);
+ test_to(u16_seq(0xffff),0xffff);
+ test_to(u16_seq(0xD800,0xDC00),0x10000);
+ test_to(u16_seq(0xDBFF,0xDFFF),0x10FFFF);
+
+
+ std::cout << "Test UTF-32" << std::endl;
+ std::cout << "- From UTF-32" << std::endl;
+
+
+ std::cout << "-- Correct" << std::endl;
+
+ test_to(u32_seq(0x10),0x10);
+ test_to(u32_seq(0xffff),0xffff);
+ test_to(u32_seq(0x10000),0x10000);
+ test_to(u32_seq(0x10ffff),0x10ffff);
+
+
+
+ std::cout << "-- Invalid surrogate" << std::endl;
+
+ test_to(u32_seq(0xD800),illegal);
+ test_to(u32_seq(0xDBFF),illegal);
+ test_to(u32_seq(0xDFFF),illegal);
+ test_to(u32_seq(0xDC00),illegal);
+ test_to(u32_seq(0x110000),illegal);
+
+ std::cout <<"-- Incomplete" << std::endl;
+
+ test_to(u32_seq(0),incomplete);
+
+ std::cout << "- To UTF-32" << std::endl;
+
+ std::cout << "-- Test correct" << std::endl;
+
+ test_to(u32_seq(0x10),0x10);
+ test_to(u32_seq(0xffff),0xffff);
+ test_to(u32_seq(0x10ffff),0x10ffff);
+
+
+
+ }
+ catch(std::exception const &e) {
+ std::cerr << "Failed " << e.what() << std::endl;
+ return EXIT_FAILURE;
+ }
+ FINALIZE();
+}
+
+// vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk