Boost logo

Boost-Commit :

From: eric_at_[hidden]
Date: 2008-08-18 17:37:24


Author: eric_niebler
Date: 2008-08-18 17:37:24 EDT (Mon, 18 Aug 2008)
New Revision: 48201
URL: http://svn.boost.org/trac/boost/changeset/48201

Log:
merged xpressive fixes from trunk for 1.37
Properties modified:
   branches/release/ (props changed)
Text files modified:
   branches/release/boost/xpressive/detail/dynamic/parser_enum.hpp | 4 +
   branches/release/boost/xpressive/detail/dynamic/parser_traits.hpp | 23 ++++++-
   branches/release/boost/xpressive/detail/utility/hash_peek_bitset.hpp | 5 +
   branches/release/boost/xpressive/regex_compiler.hpp | 11 ++-
   branches/release/libs/xpressive/doc/static_regexes.qbk | 113 +++++++++++++++++++++++++--------------
   branches/release/libs/xpressive/test/regress.txt | 10 +++
   6 files changed, 116 insertions(+), 50 deletions(-)

Modified: branches/release/boost/xpressive/detail/dynamic/parser_enum.hpp
==============================================================================
--- branches/release/boost/xpressive/detail/dynamic/parser_enum.hpp (original)
+++ branches/release/boost/xpressive/detail/dynamic/parser_enum.hpp 2008-08-18 17:37:24 EDT (Mon, 18 Aug 2008)
@@ -35,6 +35,10 @@
     token_charset_backspace, // \b
     token_posix_charset_begin, // [:
     token_posix_charset_end, // :]
+ token_equivalence_class_begin, // [=
+ token_equivalence_class_end, // =]
+ token_collation_element_begin, // [.
+ token_collation_element_end, // .]
 
     token_quote_meta_begin, // \Q
     token_quote_meta_end, // \E

Modified: branches/release/boost/xpressive/detail/dynamic/parser_traits.hpp
==============================================================================
--- branches/release/boost/xpressive/detail/dynamic/parser_traits.hpp (original)
+++ branches/release/boost/xpressive/detail/dynamic/parser_traits.hpp 2008-08-18 17:37:24 EDT (Mon, 18 Aug 2008)
@@ -115,13 +115,13 @@
         case BOOST_XPR_CHAR_(char_type, ')'): ++begin; return token_group_end;
         case BOOST_XPR_CHAR_(char_type, '|'): ++begin; return token_alternate;
         case BOOST_XPR_CHAR_(char_type, '['): ++begin; return token_charset_begin;
- case BOOST_XPR_CHAR_(char_type, ']'): ++begin; return token_charset_end;
 
         case BOOST_XPR_CHAR_(char_type, '*'):
         case BOOST_XPR_CHAR_(char_type, '+'):
         case BOOST_XPR_CHAR_(char_type, '?'):
             return token_invalid_quantifier;
 
+ case BOOST_XPR_CHAR_(char_type, ']'):
         case BOOST_XPR_CHAR_(char_type, '{'):
         default:
             return token_literal;
@@ -299,10 +299,25 @@
         case BOOST_XPR_CHAR_(char_type, '['):
             {
                 FwdIter next = begin; ++next;
- if(next != end && *next == BOOST_XPR_CHAR_(char_type, ':'))
+ if(next != end)
                 {
- begin = ++next;
- return token_posix_charset_begin;
+ detail::ensure(
+ *next != BOOST_XPR_CHAR_(char_type, '=')
+ , error_collate
+ , "equivalence classes are not yet supported"
+ );
+
+ detail::ensure(
+ *next != BOOST_XPR_CHAR_(char_type, '.')
+ , error_collate
+ , "collation sequences are not yet supported"
+ );
+
+ if(*next == BOOST_XPR_CHAR_(char_type, ':'))
+ {
+ begin = ++next;
+ return token_posix_charset_begin;
+ }
                 }
             }
             break;

Modified: branches/release/boost/xpressive/detail/utility/hash_peek_bitset.hpp
==============================================================================
--- branches/release/boost/xpressive/detail/utility/hash_peek_bitset.hpp (original)
+++ branches/release/boost/xpressive/detail/utility/hash_peek_bitset.hpp 2008-08-18 17:37:24 EDT (Mon, 18 Aug 2008)
@@ -104,7 +104,10 @@
 
     void set_bitset(hash_peek_bitset<Char> const &that)
     {
- this->bset_ |= that.bset_;
+ if(this->test_icase_(that.icase()))
+ {
+ this->bset_ |= that.bset_;
+ }
     }
 
     void set_charset(basic_chset_8bit<Char> const &that, bool icase)

Modified: branches/release/boost/xpressive/regex_compiler.hpp
==============================================================================
--- branches/release/boost/xpressive/regex_compiler.hpp (original)
+++ branches/release/boost/xpressive/regex_compiler.hpp 2008-08-18 17:37:24 EDT (Mon, 18 Aug 2008)
@@ -23,6 +23,8 @@
 #include <boost/mpl/assert.hpp>
 #include <boost/throw_exception.hpp>
 #include <boost/type_traits/is_same.hpp>
+#include <boost/type_traits/is_pointer.hpp>
+#include <boost/utility/enable_if.hpp>
 #include <boost/iterator/iterator_traits.hpp>
 #include <boost/xpressive/basic_regex.hpp>
 #include <boost/xpressive/detail/dynamic/parser.hpp>
@@ -111,7 +113,8 @@
     /// \throw regex_error when the range of characters has invalid regular
     /// expression syntax.
     template<typename InputIter>
- basic_regex<BidiIter> compile(InputIter begin, InputIter end, flag_type flags = regex_constants::ECMAScript)
+ basic_regex<BidiIter>
+ compile(InputIter begin, InputIter end, flag_type flags = regex_constants::ECMAScript)
     {
         typedef typename iterator_category<InputIter>::type category;
         return this->compile_(begin, end, flags, category());
@@ -120,14 +123,16 @@
     /// \overload
     ///
     template<typename InputRange>
- basic_regex<BidiIter> compile(InputRange const &pat, flag_type flags = regex_constants::ECMAScript)
+ typename disable_if<is_pointer<InputRange>, basic_regex<BidiIter> >::type
+ compile(InputRange const &pat, flag_type flags = regex_constants::ECMAScript)
     {
         return this->compile(boost::begin(pat), boost::end(pat), flags);
     }
 
     /// \overload
     ///
- basic_regex<BidiIter> compile(char_type const *begin, flag_type flags = regex_constants::ECMAScript)
+ basic_regex<BidiIter>
+ compile(char_type const *begin, flag_type flags = regex_constants::ECMAScript)
     {
         BOOST_ASSERT(0 != begin);
         char_type const *end = begin + std::char_traits<char_type>::length(begin);

Modified: branches/release/libs/xpressive/doc/static_regexes.qbk
==============================================================================
--- branches/release/libs/xpressive/doc/static_regexes.qbk (original)
+++ branches/release/libs/xpressive/doc/static_regexes.qbk 2008-08-18 17:37:24 EDT (Mon, 18 Aug 2008)
@@ -141,60 +141,89 @@
 The table below lists the familiar regex constructs and their equivalents in
 static xpressive.
 
+[def _s1_ [globalref boost::xpressive::s1 s1]]
+[def _bos_ [globalref boost::xpressive::bos bos]]
+[def _eos_ [globalref boost::xpressive::eos eos]]
+[def _b_ [globalref boost::xpressive::_b _b]]
+[def _n_ [globalref boost::xpressive::_n _n]]
+[def _ln_ [globalref boost::xpressive::_ln _ln]]
+[def _d_ [globalref boost::xpressive::_d _d]]
+[def _w_ [globalref boost::xpressive::_w _w]]
+[def _s_ [globalref boost::xpressive::_s _s]]
+[def _alnum_ [globalref boost::xpressive::alnum alnum]]
+[def _alpha_ [globalref boost::xpressive::alpha alpha]]
+[def _blank_ [globalref boost::xpressive::blank blank]]
+[def _cntrl_ [globalref boost::xpressive::cntrl cntrl]]
+[def _digit_ [globalref boost::xpressive::digit digit]]
+[def _graph_ [globalref boost::xpressive::graph graph]]
+[def _lower_ [globalref boost::xpressive::lower lower]]
+[def _print_ [globalref boost::xpressive::print print]]
+[def _punct_ [globalref boost::xpressive::punct punct]]
+[def _space_ [globalref boost::xpressive::space space]]
+[def _upper_ [globalref boost::xpressive::upper upper]]
+[def _xdigit_ [globalref boost::xpressive::xdigit xdigit]]
+[def _set_ [globalref boost::xpressive::set set]]
+[def _repeat_ [funcref boost::xpressive::repeat repeat]]
+[def _range_ [funcref boost::xpressive::range range]]
+[def _icase_ [funcref boost::xpressive::icase icase]]
+[def _before_ [funcref boost::xpressive::before before]]
+[def _after_ [funcref boost::xpressive::after after]]
+[def _keep_ [funcref boost::xpressive::keep keep]]
+
 [table Perl syntax vs. Static xpressive syntax
     [[Perl] [Static xpressive] [Meaning]]
- [[[^.]] [`_`] [any character (assuming Perl's /s modifier).]]
+ [[[^.]] [[globalref boost::xpressive::_ `_`]] [any character (assuming Perl's /s modifier).]]
     [[[^ab]] [`a >> b`] [sequencing of [^a] and [^b] sub-expressions.]]
     [[[^a|b]] [`a | b`] [alternation of [^a] and [^b] sub-expressions.]]
- [[[^(a)]] [`(s1= a)`] [group and capture a back-reference.]]
+ [[[^(a)]] [`(_s1_= a)`] [group and capture a back-reference.]]
     [[[^(?:a)]] [`(a)`] [group and do not capture a back-reference.]]
- [[[^\1]] [`s1`] [a previously captured back-reference.]]
+ [[[^\1]] [`_s1_`] [a previously captured back-reference.]]
     [[[^a*]] [`*a`] [zero or more times, greedy.]]
     [[[^a+]] [`+a`] [one or more times, greedy.]]
     [[[^a?]] [`!a`] [zero or one time, greedy.]]
- [[[^a{n,m}]] [`repeat<n,m>(a)`] [between [^n] and [^m] times, greedy.]]
+ [[[^a{n,m}]] [`_repeat_<n,m>(a)`] [between [^n] and [^m] times, greedy.]]
     [[[^a*?]] [`-*a`] [zero or more times, non-greedy.]]
     [[[^a+?]] [`-+a`] [one or more times, non-greedy.]]
     [[[^a??]] [`-!a`] [zero or one time, non-greedy.]]
- [[[^a{n,m}?]] [`-repeat<n,m>(a)`] [between [^n] and [^m] times, non-greedy.]]
- [[[^^]] [`bos`] [beginning of sequence assertion.]]
- [[[^$]] [`eos`] [end of sequence assertion.]]
- [[[^\b]] [`_b`] [word boundary assertion.]]
- [[[^\B]] [`~_b`] [not word boundary assertion.]]
- [[[^\\n]] [`_n`] [literal newline.]]
- [[[^.]] [`~_n`] [any character except a literal newline (without Perl's /s modifier).]]
- [[[^\\r?\\n|\\r]] [`_ln`] [logical newline.]]
- [[[^\[^\\r\\n\]]] [`~_ln`] [any single character not a logical newline.]]
- [[[^\w]] [`_w`] [a word character, equivalent to set\[alnum | '_'\].]]
- [[[^\W]] [`~_w`] [not a word character, equivalent to ~set\[alnum | '_'\].]]
- [[[^\d]] [`_d`] [a digit character.]]
- [[[^\D]] [`~_d`] [not a digit character.]]
- [[[^\s]] [`_s`] [a space character.]]
- [[[^\S]] [`~_s`] [not a space character.]]
- [[[^\[:alnum:\]]] [`alnum`] [an alpha-numeric character.]]
- [[[^\[:alpha:\]]] [`alpha`] [an alphabetic character.]]
- [[[^\[:blank:\]]] [`blank`] [a horizontal white-space character.]]
- [[[^\[:cntrl:\]]] [`cntrl`] [a control character.]]
- [[[^\[:digit:\]]] [`digit`] [a digit character.]]
- [[[^\[:graph:\]]] [`graph`] [a graphable character.]]
- [[[^\[:lower:\]]] [`lower`] [a lower-case character.]]
- [[[^\[:print:\]]] [`print`] [a printing character.]]
- [[[^\[:punct:\]]] [`punct`] [a punctuation character.]]
- [[[^\[:space:\]]] [`space`] [a white-space character.]]
- [[[^\[:upper:\]]] [`upper`] [an upper-case character.]]
- [[[^\[:xdigit:\]]] [`xdigit`] [a hexadecimal digit character.]]
- [[[^\[0-9\]]] [`range('0','9')`] [characters in range `'0'` through `'9'`.]]
+ [[[^a{n,m}?]] [`-_repeat_<n,m>(a)`] [between [^n] and [^m] times, non-greedy.]]
+ [[[^^]] [`_bos_`] [beginning of sequence assertion.]]
+ [[[^$]] [`_eos_`] [end of sequence assertion.]]
+ [[[^\b]] [`_b_`] [word boundary assertion.]]
+ [[[^\B]] [`~_b_`] [not word boundary assertion.]]
+ [[[^\\n]] [`_n_`] [literal newline.]]
+ [[[^.]] [`~_n_`] [any character except a literal newline (without Perl's /s modifier).]]
+ [[[^\\r?\\n|\\r]] [`_ln_`] [logical newline.]]
+ [[[^\[^\\r\\n\]]] [`~_ln_`] [any single character not a logical newline.]]
+ [[[^\w]] [`_w_`] [a word character, equivalent to set\[alnum | '_'\].]]
+ [[[^\W]] [`~_w_`] [not a word character, equivalent to ~set\[alnum | '_'\].]]
+ [[[^\d]] [`_d_`] [a digit character.]]
+ [[[^\D]] [`~_d_`] [not a digit character.]]
+ [[[^\s]] [`_s_`] [a space character.]]
+ [[[^\S]] [`~_s_`] [not a space character.]]
+ [[[^\[:alnum:\]]] [`_alnum_`] [an alpha-numeric character.]]
+ [[[^\[:alpha:\]]] [`_alpha_`] [an alphabetic character.]]
+ [[[^\[:blank:\]]] [`_blank_`] [a horizontal white-space character.]]
+ [[[^\[:cntrl:\]]] [`_cntrl_`] [a control character.]]
+ [[[^\[:digit:\]]] [`_digit_`] [a digit character.]]
+ [[[^\[:graph:\]]] [`_graph_`] [a graphable character.]]
+ [[[^\[:lower:\]]] [`_lower_`] [a lower-case character.]]
+ [[[^\[:print:\]]] [`_print_`] [a printing character.]]
+ [[[^\[:punct:\]]] [`_punct_`] [a punctuation character.]]
+ [[[^\[:space:\]]] [`_space_`] [a white-space character.]]
+ [[[^\[:upper:\]]] [`_upper_`] [an upper-case character.]]
+ [[[^\[:xdigit:\]]] [`_xdigit_`] [a hexadecimal digit character.]]
+ [[[^\[0-9\]]] [`_range_('0','9')`] [characters in range `'0'` through `'9'`.]]
     [[[^\[abc\]]] [`as_xpr('a') | 'b' |'c'`] [characters `'a'`, `'b'`, or `'c'`.]]
- [[[^\[abc\]]] [`(set= 'a','b','c')`] [['same as above]]]
- [[[^\[0-9abc\]]] [`set[ range('0','9') | 'a' | 'b' | 'c' ]`] [characters `'a'`, `'b'`, `'c'` or in range `'0'` through `'9'`.]]
- [[[^\[0-9abc\]]] [`set[ range('0','9') | (set= 'a','b','c') ]`] [['same as above]]]
- [[[^\[^abc\]]] [`~(set= 'a','b','c')`] [not characters `'a'`, `'b'`, or `'c'`.]]
- [[[^(?i:['stuff])]] [`icase(`[^['stuff]]`)`] [match ['stuff] disregarding case.]]
- [[[^(?>['stuff])]] [`keep(`[^['stuff]]`)`] [independent sub-expression, match ['stuff] and turn off backtracking.]]
- [[[^(?=['stuff])]] [`before(`[^['stuff]]`)`] [positive look-ahead assertion, match if before ['stuff] but don't include ['stuff] in the match.]]
- [[[^(?!['stuff])]] [`~before(`[^['stuff]]`)`] [negative look-ahead assertion, match if not before ['stuff].]]
- [[[^(?<=['stuff])]] [`after(`[^['stuff]]`)`] [positive look-behind assertion, match if after ['stuff] but don't include ['stuff] in the match. (['stuff] must be constant-width.)]]
- [[[^(?<!['stuff])]] [`~after(`[^['stuff]]`)`] [negative look-behind assertion, match if not after ['stuff]. (['stuff] must be constant-width.)]]
+ [[[^\[abc\]]] [`(_set_= 'a','b','c')`] [['same as above]]]
+ [[[^\[0-9abc\]]] [`_set_[ _range_('0','9') | 'a' | 'b' | 'c' ]`] [characters `'a'`, `'b'`, `'c'` or in range `'0'` through `'9'`.]]
+ [[[^\[0-9abc\]]] [`_set_[ _range_('0','9') | (_set_= 'a','b','c') ]`] [['same as above]]]
+ [[[^\[^abc\]]] [`~(_set_= 'a','b','c')`] [not characters `'a'`, `'b'`, or `'c'`.]]
+ [[[^(?i:['stuff])]] [`_icase_(`[^['stuff]]`)`] [match ['stuff] disregarding case.]]
+ [[[^(?>['stuff])]] [`_keep_(`[^['stuff]]`)`] [independent sub-expression, match ['stuff] and turn off backtracking.]]
+ [[[^(?=['stuff])]] [`_before_(`[^['stuff]]`)`] [positive look-ahead assertion, match if before ['stuff] but don't include ['stuff] in the match.]]
+ [[[^(?!['stuff])]] [`~_before_(`[^['stuff]]`)`] [negative look-ahead assertion, match if not before ['stuff].]]
+ [[[^(?<=['stuff])]] [`_after_(`[^['stuff]]`)`] [positive look-behind assertion, match if after ['stuff] but don't include ['stuff] in the match. (['stuff] must be constant-width.)]]
+ [[[^(?<!['stuff])]] [`~_after_(`[^['stuff]]`)`] [negative look-behind assertion, match if not after ['stuff]. (['stuff] must be constant-width.)]]
 ]
 \n
 

Modified: branches/release/libs/xpressive/test/regress.txt
==============================================================================
--- branches/release/libs/xpressive/test/regress.txt (original)
+++ branches/release/libs/xpressive/test/regress.txt 2008-08-18 17:37:24 EDT (Mon, 18 Aug 2008)
@@ -3738,3 +3738,13 @@
 flg=m
 br0=c
 [end]
+
+[nocasealternate]
+str=common HighLight and Blow this
+pat=highlight|blow
+flg=ig
+sub=<b>$&</b>
+res=common <b>HighLight</b> and <b>Blow</b> this
+br0=HighLight
+br1=Blow
+[end]


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk