Boost logo

Boost-Commit :

From: eric_at_[hidden]
Date: 2008-01-14 15:26:59


Author: eric_niebler
Date: 2008-01-14 15:26:58 EST (Mon, 14 Jan 2008)
New Revision: 42776
URL: http://svn.boost.org/trac/boost/changeset/42776

Log:
add skip(), for specifying a skip regex
Added:
   trunk/libs/xpressive/test/test_skip.cpp (contents, props changed)
Text files modified:
   trunk/boost/xpressive/regex_primitives.hpp | 129 ++++++++++++++++++++++++++++++++++++++++
   trunk/libs/xpressive/test/Jamfile.v2 | 1
   2 files changed, 130 insertions(+), 0 deletions(-)

Modified: trunk/boost/xpressive/regex_primitives.hpp
==============================================================================
--- trunk/boost/xpressive/regex_primitives.hpp (original)
+++ trunk/boost/xpressive/regex_primitives.hpp 2008-01-14 15:26:58 EST (Mon, 14 Jan 2008)
@@ -115,6 +115,93 @@
     #undef minus_one
     #endif
 
+ // replace "Expr" with "keep(*State) >> Expr"
+ struct skip_primitives : proto::callable
+ {
+ template<typename Sig>
+ struct result;
+
+ template<typename This, typename Expr, typename State, typename Visitor>
+ struct result<This(Expr, State, Visitor)>
+ : proto::shift_right<
+ typename proto::unary_expr<
+ keeper_tag
+ , typename proto::dereference<State>::type
+ >::type
+ , Expr
+ >
+ {};
+
+ template<typename Expr, typename State, typename Visitor>
+ typename result<void(Expr, State, Visitor)>::type
+ operator ()(Expr const &expr, State const &state, Visitor &visitor) const
+ {
+ typedef typename result<void(Expr, State, Visitor)>::type type;
+ type that = {{{state}}, expr};
+ return that;
+ }
+ };
+
+ struct Primitives
+ : proto::or_<
+ proto::terminal<proto::_>
+ , proto::comma<proto::_, proto::_>
+ , proto::subscript<proto::terminal<set_initializer>, proto::_>
+ , proto::assign<proto::terminal<set_initializer>, proto::_>
+ , proto::assign<proto::terminal<attribute_placeholder<proto::_> >, proto::_>
+ , proto::complement<Primitives>
+ >
+ {};
+
+ struct SkipGrammar
+ : proto::or_<
+ proto::when<Primitives, skip_primitives>
+ , proto::assign<proto::terminal<mark_placeholder>, SkipGrammar> // don't "skip" mark tags
+ , proto::subscript<SkipGrammar, proto::_> // don't put skips in actions
+ , proto::binary_expr<modifier_tag, proto::_, SkipGrammar> // don't skip modifiers
+ , proto::unary_expr<lookbehind_tag, proto::_> // don't skip lookbehinds
+ , proto::nary_expr<proto::_, proto::vararg<SkipGrammar> > // everything else is fair game!
+ >
+ {};
+
+ template<typename Skip>
+ struct skip_directive
+ {
+ typedef typename proto::result_of::as_expr<Skip>::type skip_type;
+
+ skip_directive(Skip const &skip)
+ : skip_(proto::as_expr(skip))
+ {}
+
+ template<typename Sig>
+ struct result;
+
+ template<typename This, typename Expr>
+ struct result<This(Expr)>
+ : proto::shift_right<
+ typename SkipGrammar::result<void(
+ typename proto::result_of::as_expr<Expr>::type
+ , skip_type
+ , mpl::void_
+ )>::type
+ , typename proto::dereference<skip_type>::type
+ >
+ {};
+
+ template<typename Expr>
+ typename result<skip_directive(Expr)>::type
+ operator ()(Expr const &expr) const
+ {
+ mpl::void_ ignore;
+ typedef typename result<skip_directive(Expr)>::type result_type;
+ result_type result = {SkipGrammar()(proto::as_expr(expr), this->skip_, ignore), {skip_}};
+ return result;
+ }
+
+ private:
+ skip_type skip_;
+ };
+
 /*
 ///////////////////////////////////////////////////////////////////////////////
 /// INTERNAL ONLY
@@ -637,6 +724,48 @@
 proto::terminal<detail::attribute_placeholder<mpl::int_<8> > >::type const a8 = {{}};
 proto::terminal<detail::attribute_placeholder<mpl::int_<9> > >::type const a9 = {{}};
 
+///////////////////////////////////////////////////////////////////////////////
+/// \brief Specify which characters to skip when matching a regex.
+///
+/// <tt>skip()</tt> instructs the regex engine to skip certain characters when matching
+/// a regex. It is most useful for writing regexes that ignore whitespace.
+/// For instance, the following specifies a regex that skips whitespace and
+/// punctuation:
+///
+/// \code
+/// // A sentence is one or more words separated by whitespace
+/// // and punctuation.
+/// sregex word = +alpha;
+/// sregex sentence = skip(set[_s | punct])( +word );
+/// \endcode
+///
+/// The way it works in the above example is to insert
+/// <tt>keep(*set[_s | punct])</tt> before each primitive within the regex.
+/// A "primitive" includes terminals like strings, character sets and nested
+/// regexes. A final <tt>*set[_s | punct]</tt> is added to the end of the
+/// regex. The regex <tt>sentence</tt> specified above is equivalent to
+/// the following:
+///
+/// \code
+/// sregex sentence = +( keep(*set[_s | punct]) >> word )
+/// >> *set[_s | punct];
+/// \endcode
+///
+/// \attention Skipping does not affect how nested regexes are handles because
+/// they are treated atomically. String literals are also treated
+/// atomically; that is, no skipping is done within a string literal. So
+/// <tt>skip(_s)("this that")</tt> is not the same as
+/// <tt>skip(_s)("this" >> as_xpr("that"))</tt>. The first will only match
+/// when there is only one space between "this" and "that". The second will
+/// skip any and all whitespace between "this" and "that".
+///
+/// \param skip A regex that specifies which characters to skip.
+template<typename Skip>
+detail::skip_directive<Skip> skip(Skip const &skip)
+{
+ return detail::skip_directive<Skip>(skip);
+}
+
 namespace detail
 {
     inline void ignore_unused_regex_primitives()

Modified: trunk/libs/xpressive/test/Jamfile.v2
==============================================================================
--- trunk/libs/xpressive/test/Jamfile.v2 (original)
+++ trunk/libs/xpressive/test/Jamfile.v2 2008-01-14 15:26:58 EST (Mon, 14 Jan 2008)
@@ -57,6 +57,7 @@
          [ run test_symbols.cpp ]
          [ run test_dynamic.cpp ]
          [ run test_dynamic_grammar.cpp ]
+ [ run test_skip.cpp ]
          [ link multiple_defs1.cpp multiple_defs2.cpp : : multiple_defs ]
          [ compile test_basic_regex.cpp ]
          [ compile test_match_results.cpp ]

Added: trunk/libs/xpressive/test/test_skip.cpp
==============================================================================
--- (empty file)
+++ trunk/libs/xpressive/test/test_skip.cpp 2008-01-14 15:26:58 EST (Mon, 14 Jan 2008)
@@ -0,0 +1,96 @@
+///////////////////////////////////////////////////////////////////////////////
+// test_skip.hpp
+//
+// Copyright 2004 Eric Niebler. Distributed under the Boost
+// Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <map>
+#include <iostream>
+#include <boost/xpressive/xpressive.hpp>
+#include <boost/xpressive/regex_actions.hpp>
+#include <boost/test/unit_test.hpp>
+
+using namespace boost::unit_test;
+using namespace boost::xpressive;
+
+void test1()
+{
+ std::string s = "a a b b c c";
+
+ sregex rx =
+ "a a" >>
+ skip(_s)
+ (
+ (s1= as_xpr('b')) >>
+ as_xpr('b') >>
+ *as_xpr('c') // causes backtracking
+ ) >>
+ "c c";
+
+ smatch what;
+ BOOST_CHECK( regex_match(s, what, rx) );
+
+ s = "123,456,789";
+ sregex rx2 = skip(',')(+_d);
+ BOOST_CHECK( regex_match(s, what, rx2) );
+
+ s = "foo";
+ sregex rx3 = skip(_s)(after("fo") >> 'o');
+ BOOST_CHECK( regex_search(s, what, rx3) );
+}
+
+template<typename Expr>
+void test_skip_aux(Expr const &expr)
+{
+ sregex rx = skip(_s)(expr);
+}
+
+void test_skip()
+{
+ int i=0;
+ std::map<std::string, int> syms;
+ std::locale loc;
+
+ test_skip_aux( 'a' );
+ test_skip_aux( _ );
+ test_skip_aux( +_ );
+ test_skip_aux( -+_ );
+ test_skip_aux( !_ );
+ test_skip_aux( -!_ );
+ test_skip_aux( repeat<0,42>(_) );
+ test_skip_aux( -repeat<0,42>(_) );
+ test_skip_aux( _ >> 'a' );
+ test_skip_aux( _ >> 'a' | _ );
+ test_skip_aux( _ >> 'a' | _ >> 'b' );
+ test_skip_aux( s1= _ >> 'a' | _ >> 'b' );
+ test_skip_aux( icase(_ >> 'a' | _ >> 'b') );
+ test_skip_aux( imbue(loc)(_ >> 'a' | _ >> 'b') );
+ test_skip_aux( (set='a') );
+ test_skip_aux( (set='a','b') );
+ test_skip_aux( ~(set='a') );
+ test_skip_aux( ~(set='a','b') );
+ test_skip_aux( range('a','b') );
+ test_skip_aux( ~range('a','b') );
+ test_skip_aux( set['a' | alpha] );
+ test_skip_aux( ~set['a' | alpha] );
+ test_skip_aux( before(_) );
+ test_skip_aux( ~before(_) );
+ test_skip_aux( after(_) );
+ test_skip_aux( ~after(_) );
+ test_skip_aux( keep(*_) );
+ test_skip_aux( (*_)[ref(i) = as<int>(_) + 1] );
+ test_skip_aux( (a1= syms)[ref(i) = a1 + 1] );
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// init_unit_test_suite
+//
+test_suite* init_unit_test_suite( int argc, char* argv[] )
+{
+ test_suite *test = BOOST_TEST_SUITE("test skip()");
+
+ test->add(BOOST_TEST_CASE(&test1));
+
+ return test;
+}


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk