Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r58722 - in trunk: boost/regex/v4 libs/regex/test/regress
From: john_at_[hidden]
Date: 2010-01-05 13:04:10


Author: johnmaddock
Date: 2010-01-05 13:04:08 EST (Tue, 05 Jan 2010)
New Revision: 58722
URL: http://svn.boost.org/trac/boost/changeset/58722

Log:
Fix bug that effects recursive expressions combined with repeats.
Text files modified:
   trunk/boost/regex/v4/basic_regex_creator.hpp | 35 +++++++++++++++++++++++++++++++++++
   trunk/boost/regex/v4/basic_regex_parser.hpp | 4 +++-
   trunk/boost/regex/v4/perl_matcher.hpp | 11 ++++++++---
   trunk/boost/regex/v4/perl_matcher_non_recursive.hpp | 5 +++++
   trunk/boost/regex/v4/states.hpp | 8 ++++++++
   trunk/libs/regex/test/regress/test_perl_ex.cpp | 4 ++++
   6 files changed, 63 insertions(+), 4 deletions(-)

Modified: trunk/boost/regex/v4/basic_regex_creator.hpp
==============================================================================
--- trunk/boost/regex/v4/basic_regex_creator.hpp (original)
+++ trunk/boost/regex/v4/basic_regex_creator.hpp 2010-01-05 13:04:08 EST (Tue, 05 Jan 2010)
@@ -811,8 +811,43 @@
             {
                if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == id))
                {
+ //
+ // We've found the target of the recursion, set the jump target:
+ //
                   static_cast<re_jump*>(state)->alt.p = p;
                   ok = true;
+ //
+ // Now scan the target for nested repeats:
+ //
+ p = p->next.p;
+ int next_rep_id = 0;
+ while(p)
+ {
+ switch(p->type)
+ {
+ case syntax_element_rep:
+ case syntax_element_dot_rep:
+ case syntax_element_char_rep:
+ case syntax_element_short_set_rep:
+ case syntax_element_long_set_rep:
+ next_rep_id = static_cast<re_repeat*>(p)->state_id;
+ break;
+ case syntax_element_endmark:
+ if(static_cast<const re_brace*>(p)->index == id)
+ next_rep_id = -1;
+ break;
+ default:
+ break;
+ }
+ if(next_rep_id)
+ break;
+ p = p->next.p;
+ }
+ if(next_rep_id > 0)
+ {
+ static_cast<re_recurse*>(state)->state_id = next_rep_id - 1;
+ }
+
                   break;
                }
                p = p->next.p;

Modified: trunk/boost/regex/v4/basic_regex_parser.hpp
==============================================================================
--- trunk/boost/regex/v4/basic_regex_parser.hpp (original)
+++ trunk/boost/regex/v4/basic_regex_parser.hpp 2010-01-05 13:04:08 EST (Tue, 05 Jan 2010)
@@ -1939,7 +1939,9 @@
       }
 insert_recursion:
       pb->index = markid = 0;
- static_cast<re_jump*>(this->append_state(syntax_element_recurse, sizeof(re_jump)))->alt.i = v;
+ re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
+ pr->alt.i = v;
+ pr->state_id = 0;
       static_cast<re_case*>(
             this->append_state(syntax_element_toggle_case, sizeof(re_case))
             )->icase = this->flags() & regbase::icase;

Modified: trunk/boost/regex/v4/perl_matcher.hpp
==============================================================================
--- trunk/boost/regex/v4/perl_matcher.hpp (original)
+++ trunk/boost/regex/v4/perl_matcher.hpp 2010-01-05 13:04:08 EST (Tue, 05 Jan 2010)
@@ -277,10 +277,15 @@
       else
       {
          repeater_count* p = next;
- while(p->state_id != state_id)
+ while(p && (p->state_id != state_id))
             p = p->next;
- count = p->count;
- start_pos = p->start_pos;
+ if(p)
+ {
+ count = p->count;
+ start_pos = p->start_pos;
+ }
+ else
+ count = 0;
       }
    }
    ~repeater_count()

Modified: trunk/boost/regex/v4/perl_matcher_non_recursive.hpp
==============================================================================
--- trunk/boost/regex/v4/perl_matcher_non_recursive.hpp (original)
+++ trunk/boost/regex/v4/perl_matcher_non_recursive.hpp 2010-01-05 13:04:08 EST (Tue, 05 Jan 2010)
@@ -904,10 +904,15 @@
    }
    recursion_stack[recursion_stack_position].preturn_address = pstate->next.p;
    recursion_stack[recursion_stack_position].results = *m_presult;
+ if(static_cast<const re_recurse*>(pstate)->state_id > 0)
+ {
+ push_repeater_count(static_cast<const re_recurse*>(pstate)->state_id, &next_count);
+ }
    pstate = static_cast<const re_jump*>(pstate)->alt.p;
    recursion_stack[recursion_stack_position].id = static_cast<const re_brace*>(pstate)->index;
    ++recursion_stack_position;
    //BOOST_ASSERT(recursion_stack[recursion_stack_position-1].id);
+
    return true;
 }
 

Modified: trunk/boost/regex/v4/states.hpp
==============================================================================
--- trunk/boost/regex/v4/states.hpp (original)
+++ trunk/boost/regex/v4/states.hpp 2010-01-05 13:04:08 EST (Tue, 05 Jan 2010)
@@ -248,6 +248,14 @@
    bool greedy; // True if this is a greedy repeat
 };
 
+/*** struct re_recurse ************************************************
+Recurse to a particular subexpression.
+**********************************************************************/
+struct re_recurse : public re_jump
+{
+ int state_id; // identifier of first nested repeat within the recursion.
+};
+
 /*** enum re_jump_size_type *******************************************
 Provides compiled size of re_jump structure (allowing for trailing alignment).
 We provide this so we know how manybytes to insert when constructing the machine

Modified: trunk/libs/regex/test/regress/test_perl_ex.cpp
==============================================================================
--- trunk/libs/regex/test/regress/test_perl_ex.cpp (original)
+++ trunk/libs/regex/test/regress/test_perl_ex.cpp 2010-01-05 13:04:08 EST (Tue, 05 Jan 2010)
@@ -892,5 +892,9 @@
    TEST_REGEX_SEARCH("\\b(?&byte)(\\.(?&byte)){3}(?(DEFINE)(?<byte>2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))", perl|mod_x, "10.0.0.0", match_default, make_array(0, 8, 6, 8, -1, -1, -2, -2));
    TEST_REGEX_SEARCH("\\b(?&byte)(\\.(?&byte)){3}(?(DEFINE)(?<byte>2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))", perl|mod_x, "10.6", match_default, make_array(-2, -2));
    TEST_REGEX_SEARCH("\\b(?&byte)(\\.(?&byte)){3}(?(DEFINE)(?<byte>2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))", perl|mod_x, "455.3.4.5", match_default, make_array(-2, -2));
+
+ // Bugs:
+ TEST_REGEX_SEARCH("namespace\\s+(\\w+)\\s+(\\{(?:[^{}]*(?:(?2)[^{}]*)*)?\\})", perl, "namespace one { namespace two { int foo(); } }", match_default, make_array(0, 46, 10, 13, 14, 46, -2, -2));
+ TEST_REGEX_SEARCH("namespace\\s+(\\w+)\\s+(\\{(?:[^{}]*(?:(?2)[^{}]*)*)?\\})", perl, "namespace one { namespace two { int foo(){} } { {{{ } } } } {}}", match_default, make_array(0, 64, 10, 13, 14, 64, -2, -2));
 }
 


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk