|
Boost-Commit : |
Subject: [Boost-commit] svn:boost r64344 - in sandbox/SOC/2009/unicode: boost/unicode libs/unicode/doc libs/unicode/test/iterator
From: loufoque_at_[hidden]
Date: 2010-07-25 19:13:08
Author: mgaunard
Date: 2010-07-25 19:13:07 EDT (Sun, 25 Jul 2010)
New Revision: 64344
URL: http://svn.boost.org/trac/boost/changeset/64344
Log:
fixing a few things with (de)composed concatenation
Text files modified:
sandbox/SOC/2009/unicode/boost/unicode/cat.hpp | 140 ++++++++++++++++++++--------------------
sandbox/SOC/2009/unicode/boost/unicode/combining.hpp | 45 ++++++++----
sandbox/SOC/2009/unicode/libs/unicode/doc/Jamfile.v2 | 2
sandbox/SOC/2009/unicode/libs/unicode/doc/users_manual.qbk | 9 --
sandbox/SOC/2009/unicode/libs/unicode/test/iterator/test_codecvt.cpp | 2
5 files changed, 101 insertions(+), 97 deletions(-)
Modified: sandbox/SOC/2009/unicode/boost/unicode/cat.hpp
==============================================================================
--- sandbox/SOC/2009/unicode/boost/unicode/cat.hpp (original)
+++ sandbox/SOC/2009/unicode/boost/unicode/cat.hpp 2010-07-25 19:13:07 EDT (Sun, 25 Jul 2010)
@@ -51,42 +51,24 @@
> \
cat_limits(cv1 Range1 ref1 range1, cv2 Range2 ref2 range2) \
{ \
- converted_range< \
- cv2 Range2, \
- utf_decoder \
- > decoded2 = adaptors::utf_decode(range2); \
- \
- if(!empty(decoded2) \
- && ucd::get_combining_class(*begin(decoded2)) != 0) \
+ if(!empty(range2) \
+ && ucd::get_combining_class(*begin(range2)) != 0) \
{ \
- converted_range< \
- cv1 Range1, \
- utf_decoder \
- > decoded1 = adaptors::utf_decode(range1); \
- \
- convert_iterator< \
- typename range_iterator<cv1 Range1>::type, \
- utf_decoder \
- > end = boost::end(decoded1); \
+ typename range_iterator<cv1 Range1>::type \
+ new_end = boost::end(range1); \
\
- convert_iterator< \
- typename range_iterator<cv2 Range2>::type, \
- utf_decoder \
- > begin = boost::begin(decoded2); \
+ typename range_iterator<cv2 Range2>::type \
+ new_begin = boost::begin(range2); \
\
combiner().rtl( \
- boost::begin(decoded1), \
- end \
+ boost::begin(range1), \
+ new_end \
); \
- typename range_iterator<cv1 Range1>::type \
- new_end = end.base(); \
\
combiner().ltr( \
- begin, \
- boost::end(decoded2) \
+ new_begin, \
+ boost::end(range2) \
); \
- typename range_iterator<cv2 Range2>::type \
- new_begin = begin.base(); \
\
return make_tuple( \
make_iterator_range(boost::begin(range1), new_end), \
@@ -108,9 +90,9 @@
#ifdef BOOST_UNICODE_DOXYGEN_INVOKED
/** INTERNAL ONLY */
#define BOOST_UNICODE_COMPOSE_CONCAT_DEF(name, nf, convert, n) \
-/** Concatenates two ranges of UTF code units and puts the result in \c out.
+/** Concatenates two ranges of code points and puts the result in \c out.
Throws \c std::out_of_range if the input or resulting strings are not stream-safe.
- \pre \c Range1 and \c Range2 are in Normalized Form nf, have the same value type and are non-empty.
+ \pre \c Range1 and \c Range2 are in Normalized Form nf and are non-empty.
\post \c out is in Normalized Form nf and is stream-safe. */ \
template<typename Range1, typename Range2, typename OutputIterator, typename... T> \
OutputIterator name##_concat(const Range1& range1, const Range2& range2, OutputIterator out, const T&... args);
@@ -131,7 +113,7 @@
t = cat_limits(range1, range2); \
\
out = copy(t.get<0>(), out); \
- out = convert(boost::join(t.get<1>(), t.get<2>()), make_converted_converter(utf_decoder(), BOOST_PP_SEQ_ELEM(1, seq)(BOOST_PP_ENUM_PARAMS(n, t))), adaptors::utf_encode_output<typename range_value<const Range1>::type>(out)).base(); \
+ out = convert(boost::join(t.get<1>(), t.get<2>()), BOOST_PP_SEQ_ELEM(1, seq)(BOOST_PP_ENUM_PARAMS(n, t)), out); \
return copy(t.get<3>(), out); \
}
#endif
@@ -139,61 +121,79 @@
#ifdef BOOST_UNICODE_DOXYGEN_INVOKED
/** INTERNAL ONLY */
#define BOOST_UNICODE_COMPOSE_CONCATED_DEF(name, nf, convert, n) \
-/** Concatenates two ranges of UTF code units and returns the result as a lazily
+/** Concatenates two ranges of code points and returns the result as a lazily
evaluated range.
Throws \c std::out_of_range if the input or resulting strings are not stream-safe.
- \pre \c Range1 and \c Range2 are in Normalized Form nf, have the same value type and are non-empty.
+ \pre \c Range1 and \c Range2 are in Normalized Form nf and are non-empty.
\return Lazy stream-safe range in Normalized Form nf. */ \
template<typename Range1, typename Range2, typename... T> \
-detail::unspecified<void> name##_concat(const Range1& range1, const Range2& range2, const T&... args);
+typename result_of::name##_concat<Range1, Range2>::type name##_concat(Range1&& range1, Range2&& range2, const T&... args);
#else
-#define BOOST_UNICODE_COMPOSE_CONCATED_DEF(name, nf, convert, n) \
+#define BOOST_UNICODE_COMPOSE_CONCATED_DEF(name, nf, convert, n) \
+namespace result_of \
+{ \
+ template<typename Range1, typename Range2> \
+ struct name##_concat \
+ { \
+ typedef \
+ joined_range< \
+ sub_range<Range1>, \
+ joined_range< \
+ converted_range< \
+ joined_range< \
+ sub_range<Range1>, \
+ sub_range<Range2> \
+ >, \
+ convert \
+ >, \
+ sub_range<Range2> \
+ > \
+ > type; \
+ }; \
+} \
BOOST_PP_REPEAT(BOOST_PP_INC(n), BOOST_UNICODE_COMPOSE_CONCATED_DEF_A, (name)(convert))
#define BOOST_UNICODE_COMPOSE_CONCATED_DEF_A(z, n, seq) \
template<typename Range1, typename Range2 BOOST_PP_COMMA_IF(n) BOOST_PP_ENUM_PARAMS(n, typename T)> \
-joined_range< \
- sub_range<const Range1>, \
- joined_range< \
- converted_range< \
- joined_range< \
- sub_range<const Range1>, \
- sub_range<const Range2> \
- >, \
- converted_converter< \
- utf_decoder, \
- multi_converter< \
- BOOST_PP_SEQ_ELEM(1, seq), \
- utf_encoder<typename range_value<const Range1>::type> \
- > \
- > \
- >, \
- sub_range<const Range2> \
- > \
-> BOOST_PP_CAT(BOOST_PP_SEQ_ELEM(0, seq), _concat)(const Range1& range1, const Range2& range2 BOOST_PP_COMMA_IF(n) BOOST_PP_ENUM_BINARY_PARAMS(n, const T, & t)) \
+typename result_of::BOOST_PP_CAT(BOOST_PP_SEQ_ELEM(0, seq), _concat)<Range1, Range2>::type \
+BOOST_PP_CAT(BOOST_PP_SEQ_ELEM(0, seq), _concat)(Range1& range1, Range2& range2 BOOST_PP_COMMA_IF(n) BOOST_PP_ENUM_BINARY_PARAMS(n, const T, & t)) \
{ \
tuple< \
- sub_range<const Range1>, \
- sub_range<const Range1>, \
- sub_range<const Range2>, \
- sub_range<const Range2> \
+ sub_range<Range1>, \
+ sub_range<Range1>, \
+ sub_range<Range2>, \
+ sub_range<Range2> \
> \
t = cat_limits(range1, range2); \
\
return boost::join( \
t.get<0>(), \
- boost::join(boost::adaptors::convert( \
- boost::join(t.get<1>(), t.get<2>()), \
- make_converted_converter( \
- utf_decoder(), \
- make_multi_converter( \
- BOOST_PP_SEQ_ELEM(1, seq)(BOOST_PP_ENUM_PARAMS(n, t)), \
- utf_encoder<typename range_value<const Range1>::type>() \
- ) \
- ) \
- ), \
- t.get<3>() \
- )); \
+ boost::join( \
+ boost::adaptors::convert( \
+ boost::join(t.get<1>(), t.get<2>()), \
+ BOOST_PP_SEQ_ELEM(1, seq)(BOOST_PP_ENUM_PARAMS(n, t)) \
+ ), \
+ t.get<3>() \
+ ) \
+ ); \
+} \
+template<typename Range1, typename Range2 BOOST_PP_COMMA_IF(n) BOOST_PP_ENUM_PARAMS(n, typename T)> \
+typename result_of::BOOST_PP_CAT(BOOST_PP_SEQ_ELEM(0, seq), _concat)<const Range1, const Range2>::type \
+BOOST_PP_CAT(BOOST_PP_SEQ_ELEM(0, seq), _concat)(const Range1& range1, const Range2& range2 BOOST_PP_COMMA_IF(n) BOOST_PP_ENUM_BINARY_PARAMS(n, const T, & t)) \
+{ \
+ return BOOST_PP_CAT(BOOST_PP_SEQ_ELEM(0, seq), _concat)(range1, range2 BOOST_PP_COMMA_IF(n) BOOST_PP_ENUM_PARAMS(n, t)); \
+} \
+template<typename Range1, typename Range2 BOOST_PP_COMMA_IF(n) BOOST_PP_ENUM_PARAMS(n, typename T)> \
+typename result_of::BOOST_PP_CAT(BOOST_PP_SEQ_ELEM(0, seq), _concat)<const Range1, Range2>::type \
+BOOST_PP_CAT(BOOST_PP_SEQ_ELEM(0, seq), _concat)(const Range1& range1, Range2& range2 BOOST_PP_COMMA_IF(n) BOOST_PP_ENUM_BINARY_PARAMS(n, const T, & t)) \
+{ \
+ return BOOST_PP_CAT(BOOST_PP_SEQ_ELEM(0, seq), _concat)(range1, range2 BOOST_PP_COMMA_IF(n) BOOST_PP_ENUM_PARAMS(n, t)); \
+} \
+template<typename Range1, typename Range2 BOOST_PP_COMMA_IF(n) BOOST_PP_ENUM_PARAMS(n, typename T)> \
+typename result_of::BOOST_PP_CAT(BOOST_PP_SEQ_ELEM(0, seq), _concat)<Range1, const Range2>::type \
+BOOST_PP_CAT(BOOST_PP_SEQ_ELEM(0, seq), _concat)(Range1& range1, const Range2& range2 BOOST_PP_COMMA_IF(n) BOOST_PP_ENUM_BINARY_PARAMS(n, const T, & t)) \
+{ \
+ return BOOST_PP_CAT(BOOST_PP_SEQ_ELEM(0, seq), _concat)(range1, range2 BOOST_PP_COMMA_IF(n) BOOST_PP_ENUM_PARAMS(n, t)); \
}
#endif
Modified: sandbox/SOC/2009/unicode/boost/unicode/combining.hpp
==============================================================================
--- sandbox/SOC/2009/unicode/boost/unicode/combining.hpp (original)
+++ sandbox/SOC/2009/unicode/boost/unicode/combining.hpp 2010-07-25 19:13:07 EDT (Sun, 25 Jul 2010)
@@ -4,6 +4,7 @@
#include <boost/config.hpp>
#include <boost/iterator/segment_iterator.hpp>
#include <boost/cuchar.hpp>
+#include <boost/unicode/utf.hpp>
#include <algorithm>
#include <boost/range/adaptor/reversed.hpp>
@@ -81,30 +82,41 @@
} // namespace detail
-/** Model of \c \xmlonly<conceptname>Segmenter</conceptname>\endxmlonly
- * that segments combining character sequences. */
-struct combiner
+struct combine_boundary
{
typedef char32 input_type;
- typedef char32 output_type;
-
- template<typename Iterator>
- void ltr(Iterator& begin, Iterator end)
- {
- do
- {
- ++begin;
- }
- while(begin != end && ucd::get_combining_class(*begin) != 0);
- }
template<typename Iterator>
- void rtl(Iterator begin, Iterator& end)
+ bool operator()(Iterator begin, Iterator end, Iterator pos)
{
- while(end != begin && ucd::get_combining_class(*--end) != 0);
+ return ucd::get_combining_class(*pos) == 0;
}
};
+/** Model of \c \xmlonly<conceptname>Segmenter</conceptname>\endxmlonly
+ * that segments combining character sequences. */
+typedef boost::detail::unspecified<
+ boost::boundary_segmenter<combine_boundary>
+>::type combiner;
+
+/** INTERNAL ONLY */
+#define BOOST_UNICODE_COMBINE_DEF(codec) \
+typedef boost::detail::unspecified< \
+ boost::multi_boundary< \
+ codec##_boundary, \
+ codec##_decoder, \
+ combine_boundary \
+ > \
+>::type codec##_combine_boundary; \
+typedef boost::detail::unspecified< \
+ boost::boundary_segmenter<codec##_combine_boundary> \
+>::type codec##_combiner; \
+BOOST_SEGMENTER_DEF(BOOST_UNICODE_CAT(boost::unicode, codec##_combiner), codec##_combine_segment)
+
+BOOST_UNICODE_COMBINE_DEF(u8)
+BOOST_UNICODE_COMBINE_DEF(u16)
+BOOST_UNICODE_COMBINE_DEF(utf)
+
struct combine_sorter
{
typedef char32 input_type;
@@ -179,6 +191,7 @@
};
BOOST_SEGMENTER_DEF(combiner, combine)
+BOOST_SEGMENTER_DEF(combine_sorter, combine_sort)
} // namespace unicode
} // namespace boost
Modified: sandbox/SOC/2009/unicode/libs/unicode/doc/Jamfile.v2
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/doc/Jamfile.v2 (original)
+++ sandbox/SOC/2009/unicode/libs/unicode/doc/Jamfile.v2 2010-07-25 19:13:07 EDT (Sun, 25 Jul 2010)
@@ -30,7 +30,7 @@
<doxygen:param>EXPAND_ONLY_PREDEF=YES
<doxygen:param>"EXPAND_AS_DEFINED=BOOST_UNICODE_ENCODER_DEF BOOST_UNICODE_DECODER_DEF \\
BOOST_ONEMANYCONVERTER_DEF BOOST_CONVERTER_DEF BOOST_SEGMENTER_DEF \\
- BOOST_UNICODE_GRAPHEME_DEF BOOST_UNICODE_GET_PROPERTY_DEF \\
+ BOOST_UNICODE_GRAPHEME_DEF BOOST_UNICODE_COMBINE_DEF BOOST_UNICODE_GET_PROPERTY_DEF \\
BOOST_UNICODE_FWD_2 BOOST_UNICODE_CAT_LIMITS_FWD BOOST_UNICODE_CAT \\
BOOST_UNICODE_COMPOSE_CONCAT_DEF BOOST_UNICODE_COMPOSE_CONCATED_DEF BOOST_UNICODE_COMPOSE_CAT_DEF"
<doxygen:param>SEARCH_INCLUDES=YES
Modified: sandbox/SOC/2009/unicode/libs/unicode/doc/users_manual.qbk
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/doc/users_manual.qbk (original)
+++ sandbox/SOC/2009/unicode/libs/unicode/doc/users_manual.qbk 2010-07-25 19:13:07 EDT (Sun, 25 Jul 2010)
@@ -850,11 +850,6 @@
[section Appendix C: Future Work]
-[heading Type deduction]
-A type deduction, similar to that of Boost.Fusion =result_of= namespace,
-would be useful as a lot of the functions do not have a specified return
-type.
-
[heading Non-checked UTF conversion]
The library only provides UTF conversion converts that do extensive checking
that the input is correct and that the end is not unexpectedly met.
@@ -868,10 +863,6 @@
is in a normalized form, which could be used to avoid expensive decomposition
and recomposition.
-[heading Forwarding]
-Certain functions that take ranges and return them adapted do not perform
-correct forwarding, and return an adapted const range instead.
-
[heading Unicode String type]
Future versions of the library could provide a string type that maintains
the following invariants: valid UTF, stream-safe and in Normalization Form C.
Modified: sandbox/SOC/2009/unicode/libs/unicode/test/iterator/test_codecvt.cpp
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/test/iterator/test_codecvt.cpp (original)
+++ sandbox/SOC/2009/unicode/libs/unicode/test/iterator/test_codecvt.cpp 2010-07-25 19:13:07 EDT (Sun, 25 Jul 2010)
@@ -19,7 +19,7 @@
wchar_t,
boost::unicode::utf_boundary,
boost::unicode::utf_transcoder<char>,
- boost::unicode::utf_boundary, // wrong, we want utf_combine_boundary
+ boost::unicode::utf_combine_boundary,
boost::multi_converter<
boost::converted_converter<boost::unicode::utf_decoder, boost::unicode::normalizer>,
boost::unicode::utf_encoder<wchar_t>
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk