Boost logo

Boost Users :

From: Ovanes Markarian (om_boost_at_[hidden])
Date: 2006-07-26 14:38:44


Hi!

I am trying to match a function body and find out if the params or return type have specific class
name patterns. Therefore I did a really huge regex... sorry if is not so readable, but I used
defines to make as little errors as possible...

//non marking group
#define NMG_S(s) "(?:"+s+")"
#define NMG_S_0N(s) NMG_S(s)"*"
#define NMG_S_1N(s) NMG_S(s)"+"
#define NMG_S_01(s) NMG_S(s)"?"

#define NMG(s) "(?:"s")"
#define NMG_0N(s) NMG(s)"*"
#define NMG_1N(s) NMG(s)"+"
#define NMG_01(s) NMG(s)"?"

#define RE_OR "|"
#define RE_SPACE "[[:space:]]"
#define RE_SPACE_01 RE_SPACE "?"
#define RE_SPACE_0N RE_SPACE "*"
#define RE_SPACE_1N RE_SPACE "+"

#define RE_START_WORD "\\<"
#define RE_END_WORD "\\>"

#define RE_ID "[[:alpha:]]" NMG_0N("[_a-zA-Z0-9]")
#define RE_CONST "const"
#define RE_VOLATILE "volatile"

#define RE_ANY "."
#define RE_ANY_0N ".*"
#define RE_ANY_1N ".+"
#define RE_ANY_01 ".?"

#define RE_PTR "\\*"
#define RE_PTR_0N RE_PTR "*"
#define RE_PTR_1N RE_PTR "+"
#define RE_PTR_01 RE_PTR "?"

#define RE_REF "&"
#define RE_REF_0N RE_REF "*"
#define RE_REF_1N RE_REF "+"
#define RE_REF_01 RE_REF "?"

#define RE_LINE_START "^"

#define ONE_OF_PROTOCOLS NMG_S(strings::Name1) RE_OR NMG_S(strings::Name2) RE_OR
NMG_S(strings::Name3) RE_OR \
   NMG_S(strings::Name4) RE_OR NMG_S(strings::Name5) RE_OR NMG_S(strings::Name6) RE_OR \
   NMG_S(strings::Name7) RE_OR NMG_S(strings::Name8) RE_OR NMG_S(strings::Name9) RE_OR \
   NMG_S(strings::Name10)

#define MATCH_PROTOCOL RE_START_WORD \
                                                                NMG_01(RE_ANY_0N "(" ONE_OF_PROTOCOLS ")" RE_ANY_0N) RE_OR \
                                                                NMG_01(RE_ANY_1N) \
                                                        RE_END_WORD RE_SPACE_0N

Name1 to Name10 are static const std::string definitions within a class.

A have the following regex defined to match params and function return type

const boost::regex strings::regex_fct (
        //possibly leading whitespace or const
        RE_LINE_START RE_SPACE_0N NMG_01(RE_CONST RE_SPACE_1N)

        //result type
        MATCH_PROTOCOL

        //return type can be a pointer or reference with const qualifier
        NMG_01(RE_CONST RE_SPACE_0N) RE_PTR_0N RE_SPACE_0N RE_REF_01 RE_SPACE_0N NMG_01(RE_CONST)
RE_SPACE_1N

        //function name
        RE_START_WORD NMG_01(RE_ID) RE_END_WORD RE_SPACE_0N

        //function params
        "\\(" //open function params brace
        //param type (can be repeated 0 to N times)
        NMG_0N(
                RE_SPACE_0N MATCH_PROTOCOL RE_SPACE_0N

                //may be a ptr or ref type
                NMG_01(RE_CONST RE_SPACE_0N) RE_PTR_0N RE_SPACE_0N RE_REF_01 RE_SPACE_0N NMG_01(RE_CONST)
RE_SPACE_1N //(?: still open

                //param name and default initializations are not important
                RE_ANY_0N

                //params can be comma separated in case of more then 1 param
                NMG_01(",")

        //close NMG_0N
        )

        //terminate in
        "\\)"
        );

Now I have the following test string:

using namespace std;
string search_where_fct1("CName1_Setup const* someFct(){}");

set<string> uniques_names;
find_protocols(search_where_fct1, strings::regex_fct, uniques_names);

void find_protocols(const std::string& str, const boost::regex& expression, std::set<std::string>&
log)
{
        boost::smatch what;
        IdentifyProtocolUsage usage(log);

        if(boost::regex_match(str, what, expression, boost::match_extra)) //<<<<< crash!!!!
        {
                usage(what);
        }
}

This is the boost::regex function where it crashes:

template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_imp()
{
   // initialise our stack if we are non-recursive:
#ifdef BOOST_REGEX_NON_RECURSIVE
   save_state_init init(&m_stack_base, &m_backup_state);
   used_block_count = BOOST_REGEX_MAX_BLOCKS;
#if !defined(BOOST_NO_EXCEPTIONS)
   try{
#endif
#endif

   // reset our state machine:
   position = base;
   search_base = base;
   state_count = 0;
   m_match_flags |= regex_constants::match_all;

   //LINE 162 perl_matcher_common.hpp
   !!! m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last);

   m_presult->set_base(base);
   if(m_match_flags & match_posix)
      m_result = *m_presult;
   verify_options(re.flags(), m_match_flags);
   if(0 == match_prefix())
      return false;
   return m_result[0].second == last;

#if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS)
   }
   catch(...)
   {
      // unwind all pushed states, apart from anything else this
      // ensures that all the states are correctly destructed
      // not just the memory freed.
      while(unwind(true)){}
      throw;
   }
#endif
}

Call from this line causes access violation:
Unhandled exception at 0x104817fd in some_app.exe: 0xC0000005: Access violation writing location
0x56394ab7.

I re-built regex with BOOST_REGEX_MATCH_EXTRA uncommented in user.hpp.

Would be really nice to receive some comments or suggestions.

With Kind Regards,

Ovanes Markarian


Boost-users list run by williamkempf at hotmail.com, kalb at libertysoft.com, bjorn.karlsson at readsoft.com, gregod at cs.rpi.edu, wekempf at cox.net