Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r59414 - in trunk: boost/regex/v4 libs/regex/performance
From: john_at_[hidden]
Date: 2010-02-01 08:10:29


Author: johnmaddock
Date: 2010-02-01 08:10:28 EST (Mon, 01 Feb 2010)
New Revision: 59414
URL: http://svn.boost.org/trac/boost/changeset/59414

Log:
Improve regex performance on msvc by removing statically allocated recursion stack, and using a std::vector instead.
Text files modified:
   trunk/boost/regex/v4/perl_matcher.hpp | 6 ++--
   trunk/boost/regex/v4/perl_matcher_common.hpp | 2
   trunk/boost/regex/v4/perl_matcher_non_recursive.hpp | 50 ++++++++++++++++++++-----------------
   trunk/boost/regex/v4/perl_matcher_recursive.hpp | 53 +++++++++++++++++++++------------------
   trunk/libs/regex/performance/command_line.cpp | 3 ++
   5 files changed, 63 insertions(+), 51 deletions(-)

Modified: trunk/boost/regex/v4/perl_matcher.hpp
==============================================================================
--- trunk/boost/regex/v4/perl_matcher.hpp (original)
+++ trunk/boost/regex/v4/perl_matcher.hpp 2010-02-01 08:10:28 EST (Mon, 01 Feb 2010)
@@ -366,7 +366,7 @@
       BidiIterator l_base)
       : m_result(what), base(first), last(end),
          position(first), backstop(l_base), re(e), traits_inst(e.get_traits()),
- m_independent(false), next_count(&rep_obj), rep_obj(&next_count), recursion_stack_position(0)
+ m_independent(false), next_count(&rep_obj), rep_obj(&next_count)/*, recursion_stack_position(0)*/
    {
       construct_init(e, f);
    }
@@ -488,8 +488,8 @@
    // the bitmask to use when determining whether a match_any matches a newline or not:
    unsigned char match_any_mask;
    // recursion information:
- recursion_info<results_type> recursion_stack[50];
- unsigned recursion_stack_position;
+ std::vector<recursion_info<results_type> > recursion_stack;
+ //unsigned recursion_stack_position;
 
 #ifdef BOOST_REGEX_NON_RECURSIVE
    //

Modified: trunk/boost/regex/v4/perl_matcher_common.hpp
==============================================================================
--- trunk/boost/regex/v4/perl_matcher_common.hpp (original)
+++ trunk/boost/regex/v4/perl_matcher_common.hpp 2010-02-01 08:10:28 EST (Mon, 01 Feb 2010)
@@ -735,7 +735,7 @@
       int id = -index-1;
       if(id >= 10000)
          id = re.get_data().get_id(id);
- result = recursion_stack_position && ((recursion_stack[recursion_stack_position-1].id == id) || (index == 0));
+ result = !recursion_stack.empty() && ((recursion_stack.back().id == id) || (index == 0));
       pstate = pstate->next.p;
    }
    return result;

Modified: trunk/boost/regex/v4/perl_matcher_non_recursive.hpp
==============================================================================
--- trunk/boost/regex/v4/perl_matcher_non_recursive.hpp (original)
+++ trunk/boost/regex/v4/perl_matcher_non_recursive.hpp 2010-02-01 08:10:28 EST (Mon, 01 Feb 2010)
@@ -898,19 +898,20 @@
    //
    // Set new call stack:
    //
- if(recursion_stack_position >= static_cast<int>(sizeof(recursion_stack)/sizeof(recursion_stack[0])))
+ if(recursion_stack.capacity() == 0)
    {
- return false;
+ recursion_stack.reserve(50);
    }
- recursion_stack[recursion_stack_position].preturn_address = pstate->next.p;
- recursion_stack[recursion_stack_position].results = *m_presult;
+ recursion_stack.push_back(recursion_info<results_type>());
+ recursion_stack.back().preturn_address = pstate->next.p;
+ recursion_stack.back().results = *m_presult;
    if(static_cast<const re_recurse*>(pstate)->state_id > 0)
    {
       push_repeater_count(static_cast<const re_recurse*>(pstate)->state_id, &next_count);
    }
    pstate = static_cast<const re_jump*>(pstate)->alt.p;
- recursion_stack[recursion_stack_position].id = static_cast<const re_brace*>(pstate)->index;
- ++recursion_stack_position;
+ recursion_stack.back().id = static_cast<const re_brace*>(pstate)->index;
+ //++recursion_stack_position;
    //BOOST_ASSERT(recursion_stack[recursion_stack_position-1].id);
 
    return true;
@@ -927,14 +928,15 @@
       {
          m_presult->set_second(position, index);
       }
- if(recursion_stack_position)
+ if(!recursion_stack.empty())
       {
- if(index == recursion_stack[recursion_stack_position-1].id)
+ if(index == recursion_stack.back().id)
          {
- --recursion_stack_position;
- pstate = recursion_stack[recursion_stack_position].preturn_address;
- *m_presult = recursion_stack[recursion_stack_position].results;
- push_recursion(recursion_stack[recursion_stack_position].id, recursion_stack[recursion_stack_position].preturn_address, &recursion_stack[recursion_stack_position].results);
+ //--recursion_stack_position;
+ pstate = recursion_stack.back().preturn_address;
+ *m_presult = recursion_stack.back().results;
+ push_recursion(recursion_stack.back().id, recursion_stack.back().preturn_address, &recursion_stack.back().results);
+ recursion_stack.pop_back();
          }
       }
    }
@@ -951,13 +953,14 @@
 template <class BidiIterator, class Allocator, class traits>
 bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
 {
- if(recursion_stack_position)
+ if(!recursion_stack.empty())
    {
- BOOST_ASSERT(0 == recursion_stack[recursion_stack_position-1].id);
- --recursion_stack_position;
- pstate = recursion_stack[recursion_stack_position].preturn_address;
- *m_presult = recursion_stack[recursion_stack_position].results;
- push_recursion(recursion_stack[recursion_stack_position].id, recursion_stack[recursion_stack_position].preturn_address, &recursion_stack[recursion_stack_position].results);
+ BOOST_ASSERT(0 == recursion_stack.back().id);
+ //--recursion_stack_position;
+ pstate = recursion_stack.back().preturn_address;
+ *m_presult = recursion_stack.back().results;
+ push_recursion(recursion_stack.back().id, recursion_stack.back().preturn_address, &recursion_stack.back().results);
+ recursion_stack.pop_back();
       return true;
    }
    if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first))
@@ -1523,10 +1526,11 @@
    saved_recursion<results_type>* pmp = static_cast<saved_recursion<results_type>*>(m_backup_state);
    if(!r)
    {
- recursion_stack[recursion_stack_position].id = pmp->recursion_id;
- recursion_stack[recursion_stack_position].preturn_address = pmp->preturn_address;
- recursion_stack[recursion_stack_position].results = pmp->results;
- ++recursion_stack_position;
+ recursion_stack.push_back(recursion_info<results_type>());
+ recursion_stack.back().id = pmp->recursion_id;
+ recursion_stack.back().preturn_address = pmp->preturn_address;
+ recursion_stack.back().results = pmp->results;
+ //++recursion_stack_position;
    }
    boost::re_detail::inplace_destroy(pmp++);
    m_backup_state = pmp;
@@ -1539,7 +1543,7 @@
    saved_state* pmp = static_cast<saved_state*>(m_backup_state);
    if(!r)
    {
- --recursion_stack_position;
+ recursion_stack.pop_back();
    }
    boost::re_detail::inplace_destroy(pmp++);
    m_backup_state = pmp;

Modified: trunk/boost/regex/v4/perl_matcher_recursive.hpp
==============================================================================
--- trunk/boost/regex/v4/perl_matcher_recursive.hpp (original)
+++ trunk/boost/regex/v4/perl_matcher_recursive.hpp 2010-02-01 08:10:28 EST (Mon, 01 Feb 2010)
@@ -857,16 +857,17 @@
    //
    // Set new call stack:
    //
- if(recursion_stack_position >= static_cast<int>(sizeof(recursion_stack)/sizeof(recursion_stack[0])))
+ if(recursion_stack.capacity() == 0)
    {
- return false;
+ recursion_stack.reserve(50);
    }
- recursion_stack[recursion_stack_position].preturn_address = pstate->next.p;
- recursion_stack[recursion_stack_position].results = *m_presult;
- recursion_stack[recursion_stack_position].repeater_stack = next_count;
+ recursion_stack.push_back(recursion_info<results_type>());
+ recursion_stack.back().preturn_address = pstate->next.p;
+ recursion_stack.back().results = *m_presult;
+ recursion_stack.back().repeater_stack = next_count;
    pstate = static_cast<const re_jump*>(pstate)->alt.p;
- recursion_stack[recursion_stack_position].id = static_cast<const re_brace*>(pstate)->index;
- ++recursion_stack_position;
+ recursion_stack.back().id = static_cast<const re_brace*>(pstate)->index;
+ //++recursion_stack_position;
 
    repeater_count<BidiIterator>* saved = next_count;
    repeater_count<BidiIterator> r(&next_count); // resets all repeat counts since we're recursing and starting fresh on those
@@ -876,9 +877,10 @@
 
    if(!result)
    {
- --recursion_stack_position;
- next_count = recursion_stack[recursion_stack_position].repeater_stack;
- *m_presult = recursion_stack[recursion_stack_position].results;
+ //--recursion_stack_position;
+ next_count = recursion_stack.back().repeater_stack;
+ *m_presult = recursion_stack.back().results;
+ recursion_stack.pop_back();
       return false;
    }
    return true;
@@ -895,20 +897,21 @@
       {
          m_presult->set_second(position, index);
       }
- if(recursion_stack_position)
+ if(!recursion_stack.empty())
       {
- if(index == recursion_stack[recursion_stack_position-1].id)
+ if(index == recursion_stack.back().id)
          {
- --recursion_stack_position;
- recursion_info<results_type> saved = recursion_stack[recursion_stack_position];
+ //--recursion_stack_position;
+ recursion_info<results_type> saved = recursion_stack.back();
+ recursion_stack.pop_back();
             const re_syntax_base* saved_state = pstate = saved.preturn_address;
             repeater_count<BidiIterator>* saved_count = next_count;
             next_count = saved.repeater_stack;
             *m_presult = saved.results;
             if(!match_all_states())
             {
- recursion_stack[recursion_stack_position] = saved;
- ++recursion_stack_position;
+ recursion_stack.push_back(saved);
+ //++recursion_stack_position;
                next_count = saved_count;
                return false;
             }
@@ -928,17 +931,19 @@
 template <class BidiIterator, class Allocator, class traits>
 bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
 {
- if(recursion_stack_position)
+ if(!recursion_stack.empty())
    {
- BOOST_ASSERT(0 == recursion_stack[recursion_stack_position-1].id);
- --recursion_stack_position;
- const re_syntax_base* saved_state = pstate = recursion_stack[recursion_stack_position].preturn_address;
- *m_presult = recursion_stack[recursion_stack_position].results;
+ BOOST_ASSERT(0 == recursion_stack.back().id);
+ //--recursion_stack_position;
+ const re_syntax_base* saved_state = pstate = recursion_stack.back().preturn_address;
+ *m_presult = recursion_stack.back().results;
+ recursion_stack.pop_back();
       if(!match_all_states())
       {
- recursion_stack[recursion_stack_position].preturn_address = saved_state;
- recursion_stack[recursion_stack_position].results = *m_presult;
- ++recursion_stack_position;
+ recursion_stack.push_back(recursion_info<results_type>());
+ recursion_stack.back().preturn_address = saved_state;
+ recursion_stack.back().results = *m_presult;
+ //++recursion_stack_position;
          return false;
       }
       return true;

Modified: trunk/libs/regex/performance/command_line.cpp
==============================================================================
--- trunk/libs/regex/performance/command_line.cpp (original)
+++ trunk/libs/regex/performance/command_line.cpp 2010-02-01 08:10:28 EST (Mon, 01 Feb 2010)
@@ -101,6 +101,9 @@
 #ifdef BOOST_HAS_PCRE
       time_pcre = true;
 #endif
+#ifdef BOOST_HAS_XPRESSIVE
+ time_xpressive = true;
+#endif
    }
    else if(what == "-test-matches")
       test_matches = true;


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk