Boost logo

Boost :

From: Eric Niebler (eric_at_[hidden])
Date: 2007-11-29 01:42:49


Jorge Lodos Vigil wrote:
> Hi
> We are using xpressive with a grammar to match certain patterns. In some cases, we have the need to ignore white spaces.
> Using dynamic regexes, this can be achieved with the ignore_white_space constant.
> Is there a way to ignore white spaces using a grammar in xpressive other than modifying the grammar itself?
> We know spirit is an option, but we need to evaluate parsing speed with as many methods as possible.
> Thanks in advance.

I have been experimenting with a skip() directive for xpressive that
lets you skip whitespace in a pattern. It does require modifying the
grammar, but only in one place:

   sregex rx = skip(_s)(alpha >> +_d);

This is equivalent to:

   sregex rx = keep(*_s) >> alpha >> +(keep(*_s) >> _d) >> *_s;

You can use any valid sub-expression as a skipper. Let me know if you
find something like this useful. I'm attaching the code. It is for use
with xpressive 2.0, which you can find in subversion or the file vault
(http://tinyurl.com/8fean).

-- 
Eric Niebler
Boost Consulting
www.boost-consulting.com

///////////////////////////////////////////////////////////////////////////////
// main.hpp
//
// Copyright 2007 Eric Niebler. Distributed under the Boost
// Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <iostream>
#include <boost/xpressive/xpressive.hpp>

namespace boost { namespace xpressive
{
    namespace detail
    {
        using proto::_;

        // replace "Expr" with "keep(*State) >> Expr"
        template<typename Grammar>
        struct skip_primitives
          : Grammar
        {
            template<typename Expr, typename State, typename Visitor>
            struct apply
              : proto::shift_right<
                    typename proto::unary_expr<
                        keeper_tag
                      , typename proto::dereference<State>::type
>::type
                  , Expr
>
            {};

            template<typename Expr, typename State, typename Visitor>
            static typename apply<Expr, State, Visitor>::type
            call(Expr const &expr, State const &state, Visitor &visitor)
            {
                typedef typename apply<Expr, State, Visitor>::type type;
                type that = {{{state}}, expr};
                return that;
            }
        };

        struct Primitives
          : proto::or_<
                proto::terminal<_>
              , proto::comma<_, _>
              , proto::subscript<proto::terminal<set_initializer>, _>
              , proto::assign<proto::terminal<set_initializer>, _>
              , proto::assign<proto::terminal<attribute_placeholder<_> >, _>
              , proto::complement<Primitives>
>
        {};

        struct SkipGrammar
          : proto::or_<
                skip_primitives<Primitives>
              , proto::assign<proto::terminal<mark_placeholder>, SkipGrammar> // don't "skip" mark tags
              , proto::subscript<SkipGrammar, _> // don't put skips in actions
              , proto::binary_expr<modifier_tag, _, SkipGrammar> // don't skip modifiers
              , proto::nary_expr<_, proto::vararg<SkipGrammar> > // everything else is fair game!
>
        {};

        template<typename Skip>
        struct skip_directive
        {
            typedef typename proto::result_of::as_expr<Skip>::type skip_type;

            skip_directive(Skip const &skip)
              : skip_(proto::as_expr(skip))
            {}

            template<typename Sig>
            struct result;

            template<typename This, typename Expr>
            struct result<This(Expr)>
              : proto::shift_right<
                    typename SkipGrammar::apply<
                        typename proto::result_of::as_expr<Expr>::type
                      , skip_type
                      , mpl::void_
>::type
                  , typename proto::dereference<skip_type>::type
>
            {};

            template<typename Expr>
            typename result<skip_directive(Expr)>::type
            operator ()(Expr const &expr) const
            {
                mpl::void_ ignore;
                typedef typename result<skip_directive(Expr)>::type result_type;
                result_type result = {SkipGrammar::call(proto::as_expr(expr), this->skip_, ignore), {skip_}};
                return result;
            }

        private:
            skip_type skip_;
        };

    }

    // skip
    template<typename Skip>
    detail::skip_directive<Skip> skip(Skip const &skip)
    {
        return detail::skip_directive<Skip>(skip);
    }

}}

using namespace boost::xpressive;

int main()
{
    std::string s = "a a b b c c";

    sregex rx =
        "a a" >>
        skip(_s)
        (
             (s1= as_xpr('b')) >>
             as_xpr('b') >>
            *as_xpr('c') // causes backtracking
        ) >>
        "c c";

    smatch what;
    bool ok = regex_match(s, what, rx);
    std::cout << (ok ? "found" : "not found") << '\n';

    s = "123,456,789";
    sregex rx2 = skip(',')(+_d);
    ok = regex_match(s, what, rx2);
    std::cout << (ok ? "found" : "not found") << '\n';

    return 0;
}

//#include <map>
//#include <boost/xpressive/regex_actions.hpp>
//
//template<typename Expr>
//void test_skip_aux(Expr const &expr)
//{
// sregex rx = skip(_s)(expr);
//}
//
//void test_skip()
//{
// int i=0;
// std::map<std::string, int> syms;
// std::locale loc;
//
// test_skip_aux( 'a' );
// test_skip_aux( _ );
// test_skip_aux( +_ );
// test_skip_aux( -+_ );
// test_skip_aux( !_ );
// test_skip_aux( -!_ );
// test_skip_aux( repeat<0,42>(_) );
// test_skip_aux( -repeat<0,42>(_) );
// test_skip_aux( _ >> 'a' );
// test_skip_aux( _ >> 'a' | _ );
// test_skip_aux( _ >> 'a' | _ >> 'b' );
// test_skip_aux( s1= _ >> 'a' | _ >> 'b' );
// test_skip_aux( icase(_ >> 'a' | _ >> 'b') );
// test_skip_aux( imbue(loc)(_ >> 'a' | _ >> 'b') );
// test_skip_aux( (set='a') );
// test_skip_aux( (set='a','b') );
// test_skip_aux( ~(set='a') );
// test_skip_aux( ~(set='a','b') );
// test_skip_aux( range('a','b') );
// test_skip_aux( ~range('a','b') );
// test_skip_aux( set['a' | alpha] );
// test_skip_aux( ~set['a' | alpha] );
// test_skip_aux( before(_) );
// test_skip_aux( ~before(_) );
// test_skip_aux( after(_) );
// test_skip_aux( ~after(_) );
// test_skip_aux( keep(*_) );
// test_skip_aux( (*_)[ref(i) = as<int>(_) + 1] );
// test_skip_aux( (a1= syms)[ref(i) = a1 + 1] );
//}


Boost list run by bdawes at acm.org, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk