Boost logo

Boost Users :

Subject: Re: [Boost-users] Pattern matching with boost
From: Anthony Foiani (tkil_at_[hidden])
Date: 2011-11-09 19:49:21


Alec Taylor <alec.taylor6_at_[hidden]> writes:

> I'm trying to parse a relatively simple pattern across 4 std::strings,
> extracting whatever the part which matches the pattern into a separate
> std::string.
>
> In an abstracted sense, here is what I want:
> s1=<string1><consecutive number>, s2=<consecutive number><string2>,
> s3=<string1><consecutive number>, s4=<consecutive number><string2>
>
> Less abstracted:
> s1="apple 1", s2="2 cheese", s3="apple 3", s4="4 cheese"

Just to be clear, what do you want in the result string? Without
losing data, I believe you could just output:

  first_num string1 string2

> How would I perform this pattern matching?

I'd suggest splitting the responsiblity between regex and your own
code. This lets you use regex for what it's good at, and then use
procedural code for what *it* is good at.

(I ran into this all the time in perl: people would try to do
*everything* in the regex, when it was often much cleaner to do some
of the work in procedural code outside the RE.)

Anyway, find my attempt below.

Best Regards,
Tony

----------------------------------------------------------------------

// compile line:
// g++ -o substr-regex substr-regex.cpp -lboost_regex

#include <iostream>
#include <string>

#include <boost/lexical_cast.hpp>
#include <boost/regex.hpp>
#include <boost/test/minimal.hpp>

namespace
{

bool
do_one_match( const std::string & in,
              const boost::regex & re,
              std::string & m1,
              std::string & m2 )
{
    boost::smatch matches;

    if ( regex_match( in, matches, re ) )
    {
        m1 = matches[1];
        m2 = matches[2];
        return true;
    }
    else
    {
        return false;
    }
}

const std::string num_pat ( "(\\d+)" );
const std::string str_pat ( "(.+?)" );
const std::string ws_pat ( "\\s+" );

const boost::regex num_str_re( num_pat + ws_pat + str_pat );
const boost::regex str_num_re( str_pat + ws_pat + num_pat );

bool
do_match( const std::string & s1,
          const std::string & s2,
          const std::string & s3,
          const std::string & s4,
          std::string & result )
{
    std::string s1_num, s1_str;
    if ( ! do_one_match( s1, str_num_re, s1_str, s1_num ) )
        return false;

    std::string s2_str, s2_num;
    if ( ! do_one_match( s2, num_str_re, s2_num, s2_str ) )
        return false;

    int s1_val( boost::lexical_cast< int >( s1_num ) );
    int s2_val( boost::lexical_cast< int >( s2_num ) );
    if ( s2_val != s1_val + 1 )
        return false;

    std::string s3_num, s3_str;
    if ( ! do_one_match( s3, str_num_re, s3_str, s3_num ) )
        return false;

    if ( s3_str != s1_str )
        return false;

    int s3_val( boost::lexical_cast< int >( s3_num ) );
    if ( s3_val != s2_val + 1 )
        return false;

    std::string s4_str, s4_num;
    if ( ! do_one_match( s4, num_str_re, s4_num, s4_str ) )
        return false;

    if ( s4_str != s2_str )
        return false;

    int s4_val( boost::lexical_cast< int >( s4_num ) );
    if ( s4_val != s3_val + 1 )
        return false;

    result = s1_num + " " + s1_str + " " + s2_str;
    return true;
}

}

int test_main( int argc, char * argv[] )
{
    std::string s1( "apple 1" );
    std::string s2( "2 cheese" );
    std::string s3( "apple 3" );
    std::string s4( "4 cheese" );

    std::string result;

    BOOST_CHECK( do_match( s1, s2, s3, s4, result ) );
    BOOST_CHECK( result == "1 apple cheese" );

    s4 = "5 cheese";
    BOOST_CHECK( ! do_match( s1, s2, s3, s4, result ) );

    s4 = "4 steak";
    BOOST_CHECK( ! do_match( s1, s2, s3, s4, result ) );

    return boost::exit_success;
}


Boost-users list run by williamkempf at hotmail.com, kalb at libertysoft.com, bjorn.karlsson at readsoft.com, gregod at cs.rpi.edu, wekempf at cox.net