Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r58466 - trunk/boost/regex/v4
From: john_at_[hidden]
Date: 2009-12-20 07:56:36


Author: johnmaddock
Date: 2009-12-20 07:56:35 EST (Sun, 20 Dec 2009)
New Revision: 58466
URL: http://svn.boost.org/trac/boost/changeset/58466

Log:
Improve recursion branch-prediction.
Text files modified:
   trunk/boost/regex/v4/basic_regex_creator.hpp | 78 ++++++++++++++++++++++++++++++++++++++-
   1 files changed, 76 insertions(+), 2 deletions(-)

Modified: trunk/boost/regex/v4/basic_regex_creator.hpp
==============================================================================
--- trunk/boost/regex/v4/basic_regex_creator.hpp (original)
+++ trunk/boost/regex/v4/basic_regex_creator.hpp 2009-12-20 07:56:35 EST (Sun, 20 Dec 2009)
@@ -677,6 +677,8 @@
 template <class charT, class traits>
 void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2)
 {
+ if(this->m_pdata->m_status)
+ return;
    // we've added all the states we need, now finish things off.
    // start by adding a terminating state:
    append_state(syntax_element_match);
@@ -698,6 +700,8 @@
    {
       m_pdata->m_has_recursions = true;
       fixup_recursions(m_pdata->m_first_state);
+ if(this->m_pdata->m_status)
+ return;
    }
    else
       m_pdata->m_has_recursions = false;
@@ -1012,6 +1016,9 @@
 void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask)
 {
    int not_last_jump = 1;
+ re_syntax_base* recursion_start = 0;
+ int recursion_sub = 0;
+ re_syntax_base* recursion_restart = 0;
 
    // track case sensitivity:
    bool l_icase = m_icase;
@@ -1057,6 +1064,40 @@
          return;
       }
       case syntax_element_recurse:
+ {
+ if(recursion_start == state)
+ {
+ // Infinite recursion!!
+ if(0 == this->m_pdata->m_status) // update the error code if not already set
+ this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
+ //
+ // clear the expression, we should be empty:
+ //
+ this->m_pdata->m_expression = 0;
+ this->m_pdata->m_expression_len = 0;
+ //
+ // and throw if required:
+ //
+ if(0 == (this->flags() & regex_constants::no_except))
+ {
+ std::string message = "Encountered an infinite recursion.";
+ boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
+ e.raise();
+ }
+ }
+ else if(recursion_start == 0)
+ {
+ recursion_start = state;
+ recursion_restart = state->next.p;
+ state = static_cast<re_jump*>(state)->alt.p;
+ if(state->type == syntax_element_startmark)
+ recursion_sub = static_cast<re_brace*>(state)->index;
+ else
+ recursion_sub = 0;
+ break;
+ }
+ // fall through, can't handle nested recursion here...
+ }
       case syntax_element_backref:
          // can be null, and any character can match:
          if(pnull)
@@ -1215,12 +1256,45 @@
                *pnull |= mask;
             return;
          }
- else
+ else if(recursion_start && (recursion_sub != 0) && (recursion_sub == static_cast<re_brace*>(state)->index))
          {
- state = state->next.p;
+ // recursion termination:
+ recursion_start = 0;
+ state = recursion_restart;
             break;
          }
 
+ //
+ // Normally we just go to the next state... but if this sub-expression is
+ // the target of a recursion, then we might be ending a recursion, in which
+ // case we should check whatever follows that recursion, as well as whatever
+ // follows this state:
+ //
+ if(m_pdata->m_has_recursions && static_cast<re_brace*>(state)->index)
+ {
+ bool ok = false;
+ re_syntax_base* p = m_pdata->m_first_state;
+ while(p)
+ {
+ if((p->type == syntax_element_recurse))
+ {
+ re_brace* p2 = static_cast<re_brace*>(static_cast<re_jump*>(p)->alt.p);
+ if((p2->type == syntax_element_startmark) && (p2->index == static_cast<re_brace*>(state)->index))
+ {
+ ok = true;
+ break;
+ }
+ }
+ p = p->next.p;
+ }
+ if(ok)
+ {
+ create_startmap(p->next.p, l_map, pnull, mask);
+ }
+ }
+ state = state->next.p;
+ break;
+
       case syntax_element_startmark:
          // need to handle independent subs as a special case:
          if(static_cast<re_brace*>(state)->index == -3)


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk