Boost logo

Boost-Commit :

From: eric_at_[hidden]
Date: 2007-08-30 11:40:34


Author: eric_niebler
Date: 2007-08-30 11:40:34 EDT (Thu, 30 Aug 2007)
New Revision: 39075
URL: http://svn.boost.org/trac/boost/changeset/39075

Log:
dynamically optimizing TST, from Dave Jenkins
Text files modified:
   trunk/boost/xpressive/detail/utility/symbols.hpp | 171 +++++++++++++++++++++++++++------------
   1 files changed, 119 insertions(+), 52 deletions(-)

Modified: trunk/boost/xpressive/detail/utility/symbols.hpp
==============================================================================
--- trunk/boost/xpressive/detail/utility/symbols.hpp (original)
+++ trunk/boost/xpressive/detail/utility/symbols.hpp 2007-08-30 11:40:34 EDT (Thu, 30 Aug 2007)
@@ -21,8 +21,7 @@
 #include <boost/range/iterator.hpp>
 #include <boost/range/begin.hpp>
 #include <boost/range/end.hpp>
-#include <boost/intrusive_ptr.hpp>
-#include <boost/xpressive/detail/utility/counted_base.hpp>
+#include <boost/shared_ptr.hpp>
 
 namespace boost { namespace xpressive { namespace detail
 {
@@ -40,67 +39,85 @@
         typedef typename range_iterator<key_type const>::type key_iterator;
         typedef value_type const *result_type;
 
- symbols()
- : root(0)
- {}
-
- // copies of this symbols table share the TST
+ // copies of this symbol table share the TST
 
         template<typename Trans>
         void load(Map const &map, Trans trans)
         {
- if(0 == this->root)
- {
- iterator begin = boost::begin(map);
- iterator end = boost::end(map);
- for(; begin != end; ++begin)
- {
- key_iterator kbegin = boost::begin(begin->first);
- key_iterator kend = boost::end(begin->first);
- this->root = this->insert(this->root, kbegin, kend, &begin->second, trans);
- }
+ iterator begin = boost::begin(map);
+ iterator end = boost::end(map);
+ node* root_p = this->root.get();
+ for(; begin != end; ++begin)
+ {
+ key_iterator kbegin = boost::begin(begin->first);
+ key_iterator kend = boost::end(begin->first);
+ root_p = this->insert(root_p, kbegin, kend, &begin->second, trans);
             }
+ this->root.reset(root_p);
         }
 
         template<typename BidiIter, typename Trans>
         result_type operator ()(BidiIter &begin, BidiIter end, Trans trans) const
         {
- return this->search(begin, end, trans);
+ return this->search(begin, end, trans, this->root.get());
         }
 
         template<typename Sink>
         void peek(Sink const &sink) const
         {
- this->peek_(this->root, sink);
+ this->peek_(this->root.get(), sink);
         }
 
     private:
- struct node;
- typedef intrusive_ptr<node> node_ptr;
-
+ ///////////////////////////////////////////////////////////////////////////////
+ // struct node : a node in the TST.
+ // The "eq" field stores the result pointer when ch is zero.
+ //
         struct node
- : counted_base<node>
+ : boost::noncopyable
         {
- node()
- : counted_base<node>()
- , ch(0)
+ node(char_type c)
+ : ch(c)
               , lo(0)
               , eq(0)
               , hi(0)
- , result(0)
+ , tau(0)
             {}
 
+ ~node()
+ {
+ delete lo;
+ if (ch)
+ delete eq;
+ delete hi;
+ }
+
+ void swap(node& that)
+ {
+ std::swap(ch, that.ch);
+ std::swap(lo, that.lo);
+ std::swap(eq, that.eq);
+ std::swap(hi, that.hi);
+ std::swap(tau, that.tau);
+ }
+
             char_type ch;
- node_ptr lo;
- node_ptr eq;
- node_ptr hi;
- result_type result;
+ node* lo;
+ union
+ {
+ node* eq;
+ result_type result;
+ };
+ node* hi;
+ long tau;
         };
 
+ ///////////////////////////////////////////////////////////////////////////////
+ // insert : insert a string into the TST
+ //
         template<typename Trans>
- node_ptr insert(node_ptr const &pp, key_iterator &begin, key_iterator end, result_type r, Trans trans) const
+ node* insert(node* p, key_iterator &begin, key_iterator end, result_type r, Trans trans) const
         {
- node_ptr p = pp;
             char_type c1 = 0;
 
             if(begin != end)
@@ -110,8 +127,7 @@
 
             if(!p)
             {
- p = new node;
- p->ch = c1;
+ p = new node(c1);
             }
 
             if(c1 < p->ch)
@@ -137,46 +153,97 @@
             return p;
         }
 
+ ///////////////////////////////////////////////////////////////////////////////
+ // conditional rotation : the goal is to minimize the overall
+ // weighted path length of each binary search tree
+ //
+ bool const cond_rotation(bool left, node* const i, node* const j) const
+ {
+ // don't rotate top node in binary search tree
+ if (i == j)
+ return false;
+ // calculate psi (the rotation condition)
+ node* const k = (left ? i->hi : i->lo);
+ long psi = 2*i->tau - j->tau - (k ? k->tau : 0);
+ if (psi <= 0)
+ return false;
+
+ // recalculate the tau values
+ j->tau += -i->tau + (k ? k->tau : 0);
+ i->tau += j->tau - (k ? k->tau : 0);
+ // fixup links and swap nodes
+ if (left)
+ {
+ j->lo = k;
+ i->hi = i;
+ }
+ else
+ {
+ j->hi = k;
+ i->lo = i;
+ }
+ (*i).swap(*j);
+ return true;
+ }
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // search : find a string in the TST
+ //
         template<typename BidiIter, typename Trans>
- result_type search(BidiIter &begin, BidiIter end, Trans trans) const
+ result_type search(BidiIter &begin, BidiIter end, Trans trans, node* p) const
         {
- const node* p = this->root.get();
- result_type r = (0 == p->ch ? p->result : 0);
- if (begin == end)
- return r;
-
- BidiIter isave = begin;
- char_type c1 = trans(*begin);
+ result_type r = 0;
+ node* p2 = p;
+ bool left = false;
+ char_type c1 = (begin != end ? trans(*begin) : 0);
             while(p)
             {
+ ++p->tau;
                 if(c1 == p->ch)
                 {
- ++begin;
- p = p->eq.get();
+ // conditional rotation test
+ if (this->cond_rotation(left, p, p2))
+ p = p2;
                     if (0 == p->ch)
                     {
- isave = begin;
+ // it's a match!
                         r = p->result;
                     }
                     if(begin == end)
                         break;
- c1 = trans(*begin);
+ ++begin;
+ p = p->eq;
+ // search for the longest match first
+ r = search(begin,end,trans,p);
+ if (0 == r)
+ {
+ // search for a match ending here
+ r = search(end,end,trans,p);
+ if (0 == r)
+ {
+ --begin;
+ }
+ }
+ break;
                 }
                 else if(c1 < p->ch)
                 {
- p = p->lo.get();
+ left = true;
+ p2 = p;
+ p = p->lo;
                 }
                 else // (c1 > p->ch)
                 {
- p = p->hi.get();
+ left = false;
+ p2 = p;
+ p = p->hi;
                 }
             }
- begin = isave;
             return r;
         }
 
         template<typename Sink>
- void peek_(node_ptr const &p, Sink const &sink) const
+ void peek_(node const *const &p, Sink const &sink) const
         {
             if(p)
             {
@@ -186,7 +253,7 @@
             }
         }
 
- node_ptr root;
+ boost::shared_ptr<node> root;
     };
 
 }}} // namespace boost::xpressive::detail


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk