Boost logo

Boost-Commit :

From: hartmut.kaiser_at_[hidden]
Date: 2008-07-15 17:43:24


Author: hkaiser
Date: 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
New Revision: 47464
URL: http://svn.boost.org/trac/boost/changeset/47464

Log:
Spirit: Updating lexer
Added:
   branches/release/boost/spirit/home/support/detail/lexer/conversion/
   branches/release/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp (contents, props changed)
   branches/release/boost/spirit/home/support/detail/lexer/file_input.hpp (contents, props changed)
   branches/release/boost/spirit/home/support/detail/lexer/input.hpp (contents, props changed)
Removed:
   branches/release/boost/spirit/home/support/detail/lexer/char_state_machine.hpp
   branches/release/boost/spirit/home/support/detail/lexer/tokeniser.hpp
Text files modified:
   branches/release/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp | 9
   branches/release/boost/spirit/home/lex/lexer/lexertl/lexertl_functor.hpp | 2
   branches/release/boost/spirit/home/lex/lexer/lexertl/lexertl_generate_static.hpp | 2
   branches/release/boost/spirit/home/lex/lexer/lexertl/lexertl_lexer.hpp | 4
   branches/release/boost/spirit/home/support/detail/lexer/char_traits.hpp | 58 +++---
   branches/release/boost/spirit/home/support/detail/lexer/consts.hpp | 10
   branches/release/boost/spirit/home/support/detail/lexer/containers/ptr_list.hpp | 4
   branches/release/boost/spirit/home/support/detail/lexer/containers/ptr_vector.hpp | 23 ++
   branches/release/boost/spirit/home/support/detail/lexer/debug.hpp | 69 ++++---
   branches/release/boost/spirit/home/support/detail/lexer/generate_cpp.hpp | 11
   branches/release/boost/spirit/home/support/detail/lexer/generator.hpp | 109 ++++++------
   branches/release/boost/spirit/home/support/detail/lexer/parser/parser.hpp | 81 +++++---
   branches/release/boost/spirit/home/support/detail/lexer/parser/tokeniser/num_token.hpp | 73 ++++----
   branches/release/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp | 57 +++++-
   branches/release/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp | 44 ++--
   branches/release/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_state.hpp | 4
   branches/release/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp | 4
   branches/release/boost/spirit/home/support/detail/lexer/parser/tree/iteration_node.hpp | 20 +
   branches/release/boost/spirit/home/support/detail/lexer/parser/tree/leaf_node.hpp | 28 ++
   branches/release/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp | 26 ++
   branches/release/boost/spirit/home/support/detail/lexer/parser/tree/selection_node.hpp | 6
   branches/release/boost/spirit/home/support/detail/lexer/parser/tree/sequence_node.hpp | 4
   branches/release/boost/spirit/home/support/detail/lexer/partition/charset.hpp | 4
   branches/release/boost/spirit/home/support/detail/lexer/partition/equivset.hpp | 30 ++
   branches/release/boost/spirit/home/support/detail/lexer/rules.hpp | 24 +-
   branches/release/boost/spirit/home/support/detail/lexer/runtime_error.hpp | 2
   branches/release/boost/spirit/home/support/detail/lexer/serialise.hpp | 6
   branches/release/boost/spirit/home/support/detail/lexer/size_t.hpp | 2
   branches/release/boost/spirit/home/support/detail/lexer/state_machine.hpp | 343 ++++++++++++++++++++++++++++++++++++---
   branches/release/boost/spirit/home/support/detail/lexer/string_token.hpp | 8
   30 files changed, 760 insertions(+), 307 deletions(-)

Modified: branches/release/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp
==============================================================================
--- branches/release/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp (original)
+++ branches/release/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -20,6 +20,9 @@
     {
     public:
         typedef std::vector<std::size_t> size_t_vector;
+ typedef
+ typename boost::detail::iterator_traits<Iterator>::value_type
+ char_type;
 
 // static std::size_t next (const std::size_t * const lookup_,
 // std::size_t const dfa_alphabet_, const std::size_t * const dfa_,
@@ -93,7 +96,8 @@
 // return id_;
 // }
 
- static std::size_t next (boost::lexer::state_machine const& state_machine_,
+ static std::size_t next (
+ boost::lexer::basic_state_machine<char_type> const& state_machine_,
             std::size_t &dfa_state_, Iterator const& start_,
             Iterator &start_token_, Iterator const& end_)
         {
@@ -186,7 +190,8 @@
 
         ///////////////////////////////////////////////////////////////////////
         static
- std::size_t next (boost::lexer::state_machine const& state_machine_,
+ std::size_t next (
+ boost::lexer::basic_state_machine<char_type> const& state_machine_,
             Iterator const& start_, Iterator &start_token_, Iterator const& end_)
         {
             if (start_token_ == end_) return 0;

Modified: branches/release/boost/spirit/home/lex/lexer/lexertl/lexertl_functor.hpp
==============================================================================
--- branches/release/boost/spirit/home/lex/lexer/lexertl/lexertl_functor.hpp (original)
+++ branches/release/boost/spirit/home/lex/lexer/lexertl/lexertl_functor.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -77,7 +77,7 @@
             std::size_t get_state() const { return 0; }
             void set_state_name (char_type const* state) {}
 
- boost::lexer::state_machine const& state_machine;
+ boost::lexer::basic_state_machine<char_type> const& state_machine;
             boost::lexer::basic_rules<char_type> const& rules;
             Iterator& first;
             Iterator last;

Modified: branches/release/boost/spirit/home/lex/lexer/lexertl/lexertl_generate_static.hpp
==============================================================================
--- branches/release/boost/spirit/home/lex/lexer/lexertl/lexertl_generate_static.hpp (original)
+++ branches/release/boost/spirit/home/lex/lexer/lexertl/lexertl_generate_static.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -67,7 +67,7 @@
     // Generate a tokenizer for the given state machine.
     template <typename Char>
     inline bool
- generate_cpp (boost::lexer::state_machine const& sm_,
+ generate_cpp (boost::lexer::basic_state_machine<Char> const& sm_,
         boost::lexer::basic_rules<Char> const& rules_,
         std::ostream &os_, char const* name_suffix = "",
         bool skip_on_nomatch = true, bool optimize_parameters = true)

Modified: branches/release/boost/spirit/home/lex/lexer/lexertl/lexertl_lexer.hpp
==============================================================================
--- branches/release/boost/spirit/home/lex/lexer/lexertl/lexertl_lexer.hpp (original)
+++ branches/release/boost/spirit/home/lex/lexer/lexertl/lexertl_lexer.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -206,7 +206,7 @@
     private:
         // this type is purely used for the iterator_type construction below
         struct iterator_data_type {
- boost::lexer::state_machine const& state_machine_;
+ boost::lexer::basic_state_machine<char_type> const& state_machine_;
             boost::lexer::basic_rules<char_type> const& rules_;
             typename Functor::semantic_actions_type const& actions_;
         };
@@ -324,7 +324,7 @@
         }
 
     private:
- mutable boost::lexer::state_machine state_machine;
+ mutable boost::lexer::basic_state_machine<char_type> state_machine;
         boost::lexer::basic_rules<char_type> rules;
         typename Functor::semantic_actions_type actions;
         mutable bool initialized_dfa;

Deleted: branches/release/boost/spirit/home/support/detail/lexer/char_state_machine.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/char_state_machine.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
+++ (empty file)
@@ -1,62 +0,0 @@
-// char_state_machine.hpp
-// Copyright (c) 2007 Ben Hanson
-//
-// Distributed under the Boost Software License, Version 1.0. (See accompanying
-// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
-#ifndef BOOST_LEXER_CHAR_STATE_MACHINE_HPP
-#define BOOST_LEXER_CHAR_STATE_MACHINE_HPP
-
-#include "consts.hpp"
-#include <map>
-#include "size_t.hpp"
-#include "string_token.hpp"
-#include <vector>
-
-namespace boost
-{
-namespace lexer
-{
-template<typename CharT>
-struct basic_char_state_machine
-{
- struct state
- {
- typedef basic_string_token<CharT> string_token;
- typedef std::map<std::size_t, string_token> size_t_string_token_map;
- typedef std::pair<std::size_t, string_token> size_t_string_token_pair;
-
- bool _end_state;
- std::size_t _id;
- std::size_t _state;
- std::size_t _bol_index;
- std::size_t _eol_index;
- size_t_string_token_map _transitions;
-
- state () :
- _end_state (false),
- _id (0),
- _state (0),
- _bol_index (npos),
- _eol_index (npos)
- {
- }
- };
-
- typedef std::vector<state> state_vector;
- typedef std::vector<state_vector> state_vector_vector;
-
- state_vector_vector _sm_vector;
-
- void clear ()
- {
- _sm_vector.clear ();
- }
-};
-
-typedef basic_char_state_machine<char> char_state_machine;
-typedef basic_char_state_machine<wchar_t> wchar_state_machine;
-
-}
-}
-
-#endif

Modified: branches/release/boost/spirit/home/support/detail/lexer/char_traits.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/char_traits.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/char_traits.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // char_traits.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -13,39 +13,41 @@
 {
 namespace lexer
 {
- template<typename CharT>
- struct char_traits
- {
- typedef CharT index_type;
-
- static index_type call(CharT ch)
- {
- return ch;
- }
- };
+template<typename CharT>
+struct char_traits
+{
+ typedef CharT char_type;
+ typedef CharT index_type;
 
- template<>
- struct char_traits<char>
+ static index_type call (CharT ch)
     {
- typedef unsigned char index_type;
-
- static index_type call(char ch)
- {
- return static_cast<index_type>(ch);
- }
- };
+ return ch;
+ }
+};
 
- template<>
- struct char_traits<wchar_t>
+template<>
+struct char_traits<char>
+{
+ typedef char char_type;
+ typedef unsigned char index_type;
+
+ static index_type call (char ch)
     {
- typedef wchar_t index_type;
+ return static_cast<index_type>(ch);
+ }
+};
 
- static index_type call(wchar_t ch)
- {
- return ch;
- }
- };
+template<>
+struct char_traits<wchar_t>
+{
+ typedef wchar_t char_type;
+ typedef wchar_t index_type;
 
+ static index_type call (wchar_t ch)
+ {
+ return ch;
+ }
+};
 }
 }
 

Modified: branches/release/boost/spirit/home/support/detail/lexer/consts.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/consts.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/consts.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // consts.h
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -14,16 +14,16 @@
 {
 namespace lexer
 {
- // 0 = end state, 1 = id, 2 = lex state, 3 = bol, 4 = eol
+ // 0 = end state, 1 = id, 2 = lex state, 3 = bol, 4 = eol,
     // 5 = dead_state_index
     enum {end_state_index, id_index, state_index, bol_index, eol_index,
         dead_state_index, dfa_offset};
 
     const std::size_t max_macro_len = 20;
     const std::size_t num_chars = 256;
- const std::size_t num_wchar_ts =
- (boost::integer_traits<wchar_t>::const_max < 0x110000) ?
- boost::integer_traits<wchar_t>::const_max : 0x110000;
+ const std::size_t num_wchar_ts =
+ (boost::integer_traits<wchar_t>::const_max < 0x110000) ?
+ boost::integer_traits<wchar_t>::const_max : 0x110000;
     const std::size_t null_token = static_cast<std::size_t> (~0);
     const std::size_t bol_token = static_cast<std::size_t> (~1);
     const std::size_t eol_token = static_cast<std::size_t> (~2);

Modified: branches/release/boost/spirit/home/support/detail/lexer/containers/ptr_list.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/containers/ptr_list.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/containers/ptr_list.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // ptr_list.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -12,7 +12,7 @@
 {
 namespace lexer
 {
-namespace internal
+namespace detail
 {
 template<typename Type>
 class ptr_list

Modified: branches/release/boost/spirit/home/support/detail/lexer/containers/ptr_vector.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/containers/ptr_vector.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/containers/ptr_vector.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // ptr_vector.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -13,7 +13,7 @@
 {
 namespace lexer
 {
-namespace internal
+namespace detail
 {
 template<typename Type>
 class ptr_vector
@@ -60,6 +60,25 @@
         return _vector[index_];
     }
 
+ bool operator == (const ptr_vector &rhs_) const
+ {
+ bool equal_ = _vector.size () == rhs_._vector.size ();
+
+ if (equal_)
+ {
+ typename vector::const_iterator lhs_iter_ = _vector.begin ();
+ typename vector::const_iterator end_ = _vector.end ();
+ typename vector::const_iterator rhs_iter_ = rhs_._vector.begin ();
+
+ for (; equal_ && lhs_iter_ != end_; ++lhs_iter_, ++rhs_iter_)
+ {
+ equal_ = **lhs_iter_ == **rhs_iter_;
+ }
+ }
+
+ return equal_;
+ }
+
     void clear ()
     {
         if (!_vector.empty ())

Added: branches/release/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp
==============================================================================
--- (empty file)
+++ branches/release/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,75 @@
+// char_state_machine.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_CHAR_STATE_MACHINE_HPP
+#define BOOST_LEXER_CHAR_STATE_MACHINE_HPP
+
+#include "../consts.hpp"
+#include <map>
+#include "../size_t.hpp"
+#include "../string_token.hpp"
+#include <vector>
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+template<typename CharT>
+struct basic_char_state_machine
+{
+ struct state
+ {
+ typedef basic_string_token<CharT> string_token;
+ typedef std::map<std::size_t, string_token> size_t_string_token_map;
+ typedef std::pair<std::size_t, string_token> size_t_string_token_pair;
+
+ bool _end_state;
+ std::size_t _id;
+ std::size_t _state;
+ std::size_t _bol_index;
+ std::size_t _eol_index;
+ size_t_string_token_map _transitions;
+
+ state () :
+ _end_state (false),
+ _id (0),
+ _state (0),
+ _bol_index (npos),
+ _eol_index (npos)
+ {
+ }
+ };
+
+ typedef std::vector<state> state_vector;
+ typedef std::vector<state_vector> state_vector_vector;
+
+ state_vector_vector _sm_vector;
+
+ bool empty () const
+ {
+ return _sm_vector.empty ();
+ }
+
+ void clear ()
+ {
+ _sm_vector.clear ();
+ }
+
+ void swap (basic_char_state_machine &csm_)
+ {
+ _sm_vector.swap (csm_._sm_vector);
+ }
+};
+
+typedef basic_char_state_machine<char> char_state_machine;
+typedef basic_char_state_machine<wchar_t> wchar_state_machine;
+
+}
+}
+}
+
+#endif

Modified: branches/release/boost/spirit/home/support/detail/lexer/debug.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/debug.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/debug.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // debug.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -112,55 +112,56 @@
         }
     }
 
- static void dump (const state_machine &state_machine_, ostream &stream_)
+ static void dump (const basic_state_machine<CharT> &state_machine_, ostream &stream_)
     {
- basic_char_state_machine<CharT> char_state_machine_;
+ typename basic_state_machine<CharT>::iterator iter_ =
+ state_machine_.begin ();
+ typename basic_state_machine<CharT>::iterator end_ =
+ state_machine_.end ();
 
- state_machine_.human_readable (char_state_machine_);
-
- for (std::size_t state_ = 0,
- states_ = char_state_machine_._sm_vector.size ();
- state_ < states_; ++state_)
+ for (std::size_t dfa_ = 0, dfas_ = state_machine_.size ();
+ dfa_ < dfas_; ++dfa_)
         {
- const typename basic_char_state_machine<CharT>::state *ptr_ =
- &char_state_machine_._sm_vector[state_].front ();
- const std::size_t size_ = char_state_machine_.
- _sm_vector[state_].size ();
+ const std::size_t states_ = iter_->states;
 
- for (std::size_t i_ = 0; i_ < size_; ++i_, ++ptr_)
+ for (std::size_t i_ = 0; i_ < states_; ++i_)
             {
                 state (stream_);
                 stream_ << i_ << std::endl;
 
- if (ptr_->_end_state)
+ if (iter_->end_state)
                 {
                     end_state (stream_);
- stream_ << ptr_->_id;
+ stream_ << iter_->id;
                     dfa (stream_);
- stream_ << ptr_->_state;
+ stream_ << iter_->goto_dfa;
                     stream_ << std::endl;
                 }
 
- if (ptr_->_bol_index != npos)
+ if (iter_->bol_index != npos)
                 {
                     bol (stream_);
- stream_ << ptr_->_bol_index << std::endl;
+ stream_ << iter_->bol_index << std::endl;
                 }
 
- if (ptr_->_eol_index != npos)
+ if (iter_->eol_index != npos)
                 {
                     eol (stream_);
- stream_ << ptr_->_eol_index << std::endl;
+ stream_ << iter_->eol_index << std::endl;
+ }
+
+ const std::size_t transitions_ = iter_->transitions;
+
+ if (transitions_ == 0)
+ {
+ ++iter_;
                 }
 
- for (typename basic_char_state_machine<CharT>::state::
- size_t_string_token_map::const_iterator iter_ = ptr_->
- _transitions.begin (), end_ = ptr_->_transitions.end ();
- iter_ != end_; ++iter_)
+ for (std::size_t t_ = 0; t_ < transitions_; ++t_)
                 {
- std::size_t transition_ = iter_->first;
+ std::size_t goto_state_ = iter_->goto_state;
 
- if (iter_->second.any ())
+ if (iter_->token.any ())
                     {
                         any (stream_);
                     }
@@ -168,20 +169,30 @@
                     {
                         open_bracket (stream_);
 
- if (iter_->second._negated)
+ if (iter_->token._negated)
                         {
                             negated (stream_);
                         }
 
                         string charset_;
+ CharT c_ = 0;
 
- escape_control_chars (iter_->second._charset,
+ escape_control_chars (iter_->token._charset,
                             charset_);
+ c_ = *charset_.c_str ();
+
+ if (!iter_->token._negated &&
+ (c_ == '^' || c_ == ']'))
+ {
+ stream_ << '\\';
+ }
+
                         stream_ << charset_;
                         close_bracket (stream_);
                     }
 
- stream_ << transition_ << std::endl;
+ stream_ << goto_state_ << std::endl;
+ ++iter_;
                 }
 
                 stream_ << std::endl;

Added: branches/release/boost/spirit/home/support/detail/lexer/file_input.hpp
==============================================================================
--- (empty file)
+++ branches/release/boost/spirit/home/support/detail/lexer/file_input.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,456 @@
+// file_input.hpp
+// Copyright (c) 2008 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_FILE_INPUT
+#define BOOST_LEXER_FILE_INPUT
+
+#include "char_traits.hpp"
+#include <fstream>
+#include "size_t.hpp"
+#include "state_machine.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+template<typename CharT, typename Traits = char_traits<CharT> >
+class basic_file_input
+{
+public:
+ class iterator
+ {
+ public:
+#if defined _MSC_VER && _MSC_VER <= 1200
+ friend basic_file_input;
+#else
+ friend class basic_file_input;
+#endif
+
+ struct data
+ {
+ std::size_t id;
+ const CharT *start;
+ const CharT *end;
+ std::size_t state;
+
+ // Construct in end() state.
+ data () :
+ id (0),
+ state (npos)
+ {
+ }
+
+ bool operator == (const data &rhs_) const
+ {
+ return id == rhs_.id && start == rhs_.start &&
+ end == rhs_.end && state == rhs_.state;
+ }
+ };
+
+ iterator () :
+ _input (0)
+ {
+ }
+
+ bool operator == (const iterator &rhs_) const
+ {
+ return _data == rhs_._data;
+ }
+
+ bool operator != (const iterator &rhs_) const
+ {
+ return !(*this == rhs_);
+ }
+
+ data &operator * ()
+ {
+ return _data;
+ }
+
+ data *operator -> ()
+ {
+ return &_data;
+ }
+
+ // Let compiler generate operator = ().
+
+ // prefix version
+ iterator &operator ++ ()
+ {
+ next_token ();
+ return *this;
+ }
+
+ // postfix version
+ iterator operator ++ (int)
+ {
+ iterator iter_ = *this;
+
+ next_token ();
+ return iter_;
+ }
+
+ void next_token ()
+ {
+ _data.start = _data.end;
+
+ if (_input->_state_machine->_dfa->size () == 1)
+ {
+ _data.id = _input->next (&_input->_state_machine->_lookup->
+ front ()->front (), _input->_state_machine->_dfa_alphabet.
+ front (), &_input->_state_machine->_dfa->front ()->
+ front (), _data.start, _data.end);
+ }
+ else
+ {
+ _data.id = _input->next (*_input->_state_machine, _data.state,
+ _data.start, _data.end);
+ }
+
+ if (_data.id == 0)
+ {
+ _data.start = 0;
+ _data.end = 0;
+ // Ensure current state matches that returned by end().
+ _data.state = npos;
+ }
+ }
+
+ private:
+ // Not owner (obviously!)
+ basic_file_input *_input;
+ data _data;
+ };
+
+#if defined _MSC_VER && _MSC_VER <= 1200
+ friend iterator;
+#else
+ friend class iterator;
+#endif
+
+ // Make it explict that we are NOT taking a copy of state_machine_!
+ basic_file_input (const basic_state_machine<CharT> *state_machine_,
+ std::basic_ifstream<CharT> *is_,
+ const std::streamsize buffer_size_ = 4096,
+ const std::streamsize buffer_increment_ = 1024) :
+ _state_machine (state_machine_),
+ _stream (is_),
+ _buffer_size (buffer_size_),
+ _buffer_increment (buffer_increment_),
+ _buffer (_buffer_size, '!')
+ {
+ _start_buffer = &_buffer.front ();
+ _end_buffer = _start_buffer + _buffer.size ();
+ _start_token = _end_buffer;
+ _end_token = _end_buffer;
+ }
+
+ iterator begin ()
+ {
+ iterator iter_;
+
+ iter_._input = this;
+ iter_._data.id = npos;
+ iter_._data.start = 0;
+ iter_._data.end = 0;
+ iter_._data.state = 0;
+ ++iter_;
+ return iter_;
+ }
+
+ iterator end ()
+ {
+ iterator iter_;
+
+ iter_._input = this;
+ iter_._data.start = 0;
+ iter_._data.end = 0;
+ return iter_;
+ }
+
+ void flush ()
+ {
+ // This temporary is mandatory, otherwise the
+ // pointer calculations won't work!
+ const CharT *temp_ = _end_buffer;
+
+ _start_token = _end_token = _end_buffer;
+ reload_buffer (temp_, true, _end_token);
+ }
+
+private:
+ typedef std::basic_istream<CharT> istream;
+ typedef std::vector<CharT> buffer;
+
+ const basic_state_machine<CharT> *_state_machine;
+ const std::streamsize _buffer_size;
+ const std::streamsize _buffer_increment;
+
+ buffer _buffer;
+ CharT *_start_buffer;
+ istream *_stream;
+ const CharT *_start_token;
+ const CharT *_end_token;
+ CharT *_end_buffer;
+
+ std::size_t next (const basic_state_machine<CharT> &state_machine_,
+ std::size_t &start_state_, const CharT * &start_, const CharT * &end_)
+ {
+ _start_token = _end_token;
+
+again:
+ const std::size_t * lookup_ = &state_machine_._lookup[start_state_]->
+ front ();
+ std::size_t dfa_alphabet_ = state_machine_._dfa_alphabet[start_state_];
+ const std::size_t *dfa_ = &state_machine_._dfa[start_state_]->front ();
+ const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
+ const CharT *curr_ = _start_token;
+ bool end_state_ = *ptr_ != 0;
+ std::size_t id_ = *(ptr_ + id_index);
+ const CharT *end_token_ = curr_;
+
+ for (;;)
+ {
+ if (curr_ >= _end_buffer)
+ {
+ if (!reload_buffer (curr_, end_state_, end_token_))
+ {
+ // EOF
+ break;
+ }
+ }
+
+ const std::size_t BOL_state_ = ptr_[bol_index];
+ const std::size_t EOL_state_ = ptr_[eol_index];
+
+ if (BOL_state_ && (_start_token == _start_buffer ||
+ *(_start_token - 1) == '\n'))
+ {
+ ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
+ }
+ else if (EOL_state_ && *curr_ == '\n')
+ {
+ ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
+ }
+ else
+ {
+ const std::size_t state_ =
+ ptr_[lookup_[static_cast<typename Traits::index_type> (*curr_++)]];
+
+ if (state_ == 0)
+ {
+ break;
+ }
+
+ ptr_ = &dfa_[state_ * dfa_alphabet_];
+ }
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ start_state_ = *(ptr_ + state_index);
+ end_token_ = curr_;
+ }
+ }
+
+ if (_start_token >= _end_buffer)
+ {
+ // No more tokens...
+ return 0;
+ }
+
+ const std::size_t EOL_state_ = ptr_[eol_index];
+
+ if (EOL_state_ && curr_ == end_)
+ {
+ ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ start_state_ = *(ptr_ + state_index);
+ end_token_ = curr_;
+ }
+ }
+
+ if (end_state_)
+ {
+ // return longest match
+ _end_token = end_token_;
+
+ if (id_ == 0) goto again;
+ }
+ else
+ {
+ // No match causes char to be skipped
+ _end_token = _start_token + 1;
+ id_ = npos;
+ }
+
+ start_ = _start_token;
+ end_ = _end_token;
+ return id_;
+ }
+
+ std::size_t next (const std::size_t * const lookup_,
+ const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
+ const CharT * &start_, const CharT * &end_)
+ {
+ _start_token = _end_token;
+
+ const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
+ const CharT *curr_ = _start_token;
+ bool end_state_ = *ptr_ != 0;
+ std::size_t id_ = id_ = *(ptr_ + id_index);
+ const CharT *end_token_ = curr_;
+
+ for (;;)
+ {
+ if (curr_ >= _end_buffer)
+ {
+ if (!reload_buffer (curr_, end_state_, end_token_))
+ {
+ // EOF
+ break;
+ }
+ }
+
+ const std::size_t BOL_state_ = ptr_[bol_index];
+ const std::size_t EOL_state_ = ptr_[eol_index];
+
+ if (BOL_state_ && (_start_token == _start_buffer ||
+ *(_start_token - 1) == '\n'))
+ {
+ ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
+ }
+ else if (EOL_state_ && *curr_ == '\n')
+ {
+ ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
+ }
+ else
+ {
+ const std::size_t state_ =
+ ptr_[lookup_[static_cast<typename Traits::index_type> (*curr_++)]];
+
+ if (state_ == 0)
+ {
+ break;
+ }
+
+ ptr_ = &dfa_[state_ * dfa_alphabet_];
+ }
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ end_token_ = curr_;
+ }
+ }
+
+ if (_start_token >= _end_buffer)
+ {
+ // No more tokens...
+ return 0;
+ }
+
+ const std::size_t EOL_state_ = ptr_[eol_index];
+
+ if (EOL_state_ && curr_ == end_)
+ {
+ ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ end_token_ = curr_;
+ }
+ }
+
+ if (end_state_)
+ {
+ // return longest match
+ _end_token = end_token_;
+ }
+ else
+ {
+ // No match causes char to be skipped
+ _end_token = _start_token + 1;
+ id_ = npos;
+ }
+
+ start_ = _start_token;
+ end_ = _end_token;
+ return id_;
+ }
+
+ bool reload_buffer (const CharT * &curr_, const bool end_state_,
+ const CharT * &end_token_)
+ {
+ bool success_ = !_stream->eof ();
+
+ if (success_)
+ {
+ const CharT *old_start_token_ = _start_token;
+ std::size_t old_size_ = _buffer.size ();
+ std::size_t count_ = 0;
+
+ if (_start_token - 1 == _start_buffer)
+ {
+ // Run out of buffer space, so increase.
+ _buffer.resize (old_size_ + _buffer_increment, '!');
+ _start_buffer = &_buffer.front ();
+ _start_token = _start_buffer + 1;
+ _stream->read (_start_buffer + old_size_,
+ _buffer_increment);
+ count_ = _stream->gcount ();
+ _end_buffer = _start_buffer + old_size_ + count_;
+ }
+ else if (_start_token < _end_buffer)
+ {
+ const std::size_t len_ = _end_buffer - _start_token;
+
+ ::memcpy (_start_buffer, _start_token - 1, (len_ + 1) * sizeof (CharT));
+ _stream->read (_start_buffer + len_ + 1,
+ static_cast<std::streamsize> (_buffer.size () - len_ - 1));
+ count_ = _stream->gcount ();
+ _start_token = _start_buffer + 1;
+ _end_buffer = _start_buffer + len_ + 1 + count_;
+ }
+ else
+ {
+ _stream->read (_start_buffer, static_cast<std::streamsize>
+ (_buffer.size ()));
+ count_ = _stream->gcount ();
+ _start_token = _start_buffer;
+ _end_buffer = _start_buffer + count_;
+ }
+
+ if (end_state_)
+ {
+ end_token_ = _start_token +
+ (end_token_ - old_start_token_);
+ }
+
+ curr_ = _start_token + (curr_ - old_start_token_);
+ }
+
+ return success_;
+ }
+
+ // Disallow copying of buffer
+ basic_file_input (const basic_file_input &);
+ const basic_file_input &operator = (const basic_file_input &);
+};
+
+typedef basic_file_input<char> file_input;
+typedef basic_file_input<wchar_t> wfile_input;
+}
+}
+
+#endif

Modified: branches/release/boost/spirit/home/support/detail/lexer/generate_cpp.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/generate_cpp.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/generate_cpp.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,10 +1,10 @@
-// cpp_code.hpp
-// Copyright (c) 2008 Ben Hanson
+// generate_cpp_code.hpp
+// Copyright (c) 2008 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
-#ifndef BOOST_LEXER_EXAMPLE_CPP_CODE_HPP
-#define BOOST_LEXER_EXAMPLE_CPP_CODE_HPP
+#ifndef BOOST_LEXER_GENERATE_CPP_CODE_HPP
+#define BOOST_LEXER_GENERATE_CPP_CODE_HPP
 
 #include "char_traits.hpp"
 #include "consts.hpp"
@@ -19,7 +19,8 @@
 {
 namespace lexer
 {
-void generate_cpp (const state_machine &sm_, std::ostream &os_,
+template<typename CharT>
+void generate_cpp (const basic_state_machine<CharT> &sm_, std::ostream &os_,
     const bool use_pointers_ = false, const bool skip_unknown_ = true,
     const bool optimise_parameters_ = true, const char *name_ = "next_token")
 {

Modified: branches/release/boost/spirit/home/support/detail/lexer/generator.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/generator.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/generator.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,13 +1,14 @@
 // generator.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 #ifndef BOOST_LEXER_GENERATOR_HPP
 #define BOOST_LEXER_GENERATOR_HPP
 
-#include <cstring> // memcmp
 #include "char_traits.hpp"
+// memcmp()
+#include <cstring>
 #include "partition/charset.hpp"
 #include "partition/equivset.hpp"
 #include <memory>
@@ -25,10 +26,10 @@
 class basic_generator
 {
 public:
- typedef state_machine::size_t_vector size_t_vector;
+ typedef typename basic_state_machine<CharT>::size_t_vector size_t_vector;
     typedef basic_rules<CharT> rules;
 
- static void build (const rules &rules_, state_machine &state_machine_)
+ static void build (const rules &rules_, basic_state_machine<CharT> &state_machine_)
     {
         std::size_t index_ = 0;
         std::size_t size_ = rules_.statemap ().size ();
@@ -56,7 +57,7 @@
                 // vector mapping token indexes to partitioned token index sets
                 index_set_vector set_mapping_;
                 // syntax tree
- internal::node *root_ = build_tree (rules_, index_,
+ detail::node *root_ = build_tree (rules_, index_,
                     node_ptr_vector_, state_machine_._lookup[index_],
                     set_mapping_, state_machine_._dfa_alphabet[index_],
                     state_machine_._seen_BOL_assertion,
@@ -69,7 +70,7 @@
         }
     }
 
- static void minimise (state_machine &state_machine_)
+ static void minimise (basic_state_machine<CharT> &state_machine_)
     {
         const std::size_t machines_ = state_machine_._dfa->size ();
 
@@ -92,30 +93,30 @@
     }
 
 protected:
- typedef internal::basic_charset<CharT> charset;
- typedef internal::ptr_list<charset> charset_list;
+ typedef detail::basic_charset<CharT> charset;
+ typedef detail::ptr_list<charset> charset_list;
     typedef std::auto_ptr<charset> charset_ptr;
- typedef internal::equivset equivset;
- typedef internal::ptr_list<equivset> equivset_list;
+ typedef detail::equivset equivset;
+ typedef detail::ptr_list<equivset> equivset_list;
     typedef std::auto_ptr<equivset> equivset_ptr;
     typedef typename charset::index_set index_set;
     typedef std::vector<index_set> index_set_vector;
- typedef internal::basic_parser<CharT> parser;
+ typedef detail::basic_parser<CharT> parser;
     typedef typename parser::node_ptr_vector node_ptr_vector;
- typedef std::set<const internal::node *> node_set;
- typedef internal::ptr_vector<node_set> node_set_vector;
- typedef std::vector<const internal::node *> node_vector;
- typedef internal::ptr_vector<node_vector> node_vector_vector;
+ typedef std::set<const detail::node *> node_set;
+ typedef detail::ptr_vector<node_set> node_set_vector;
+ typedef std::vector<const detail::node *> node_vector;
+ typedef detail::ptr_vector<node_vector> node_vector_vector;
     typedef typename parser::string string;
     typedef std::pair<string, string> string_pair;
     typedef typename parser::tokeniser::string_token string_token;
     typedef std::deque<string_pair> macro_deque;
- typedef std::pair<string, const internal::node *> macro_pair;
+ typedef std::pair<string, const detail::node *> macro_pair;
     typedef typename parser::macro_map::iterator macro_iter;
     typedef std::pair<macro_iter, bool> macro_iter_pair;
     typedef typename parser::tokeniser::token_map token_map;
 
- static internal::node *build_tree (const rules &rules_,
+ static detail::node *build_tree (const rules &rules_,
         const std::size_t state_, node_ptr_vector &node_ptr_vector_,
         size_t_vector *lookup_, index_set_vector &set_mapping_,
         std::size_t &dfa_alphabet_, bool &seen_BOL_assertion_,
@@ -139,14 +140,14 @@
         const typename rules::string_pair_deque &macrodeque_ =
             rules_.macrodeque ();
         typename parser::macro_map macromap_;
- typename internal::node::node_vector tree_vector_;
+ typename detail::node::node_vector tree_vector_;
 
         build_macros (token_map_, macrodeque_, macromap_,
             rules_.case_sensitive (), rules_.locale (), node_ptr_vector_,
             rules_.dot_not_newline (), seen_BOL_assertion_,
             seen_EOL_assertion_);
 
- internal::node *root_ = parser::parse (regex_.c_str (),
+ detail::node *root_ = parser::parse (regex_.c_str (),
             regex_.c_str () + regex_.size (), *ids_iter_, *states_iter_,
             rules_.case_sensitive (), rules_.dot_not_newline (),
             rules_.locale (), node_ptr_vector_, macromap_, token_map_,
@@ -178,9 +179,9 @@
         if (seen_BOL_assertion_)
         {
             // Fixup BOLs
- typename internal::node::node_vector::iterator iter_ =
+ typename detail::node::node_vector::iterator iter_ =
                 tree_vector_.begin ();
- typename internal::node::node_vector::iterator end_ =
+ typename detail::node::node_vector::iterator end_ =
                 tree_vector_.end ();
 
             for (; iter_ != end_; ++iter_)
@@ -191,9 +192,9 @@
 
         // join trees
         {
- typename internal::node::node_vector::iterator iter_ =
+ typename detail::node::node_vector::iterator iter_ =
                 tree_vector_.begin ();
- typename internal::node::node_vector::iterator end_ =
+ typename detail::node::node_vector::iterator end_ =
                 tree_vector_.end ();
 
             if (iter_ != end_)
@@ -205,7 +206,7 @@
             for (; iter_ != end_; ++iter_)
             {
                 node_ptr_vector_->push_back (0);
- node_ptr_vector_->back () = new internal::selection_node
+ node_ptr_vector_->back () = new detail::selection_node
                     (root_, *iter_);
                 root_ = node_ptr_vector_->back ();
             }
@@ -256,7 +257,7 @@
         {
             const typename rules::string &name_ = iter_->first;
             const typename rules::string &regex_ = iter_->second;
- internal::node *node_ = parser::parse (regex_.c_str (),
+ detail::node *node_ = parser::parse (regex_.c_str (),
                 regex_.c_str () + regex_.size (), 0, 0, case_sensitive_,
                 not_dot_newline_, locale_, node_ptr_vector_, macromap_,
                 token_map_, seen_BOL_assertion_, seen_EOL_assertion_);
@@ -267,11 +268,11 @@
         }
     }
 
- static void build_dfa (internal::node *root_,
+ static void build_dfa (detail::node *root_,
         const index_set_vector &set_mapping_, const std::size_t dfa_alphabet_,
         size_t_vector &dfa_)
     {
- typename internal::node::node_vector *followpos_ =
+ typename detail::node::node_vector *followpos_ =
             &root_->firstpos ();
         node_set_vector seen_sets_;
         node_vector_vector seen_vectors_;
@@ -302,7 +303,10 @@
                 {
                     ptr_ = &dfa_.front () + ((index_ + 1) * dfa_alphabet_);
 
- for (typename internal::equivset::index_vector::const_iterator
+ // Prune abstemious transitions from end states.
+ if (*ptr_ && !equivset_->_greedy) continue;
+
+ for (typename detail::equivset::index_vector::const_iterator
                         equiv_iter_ = equivset_->_index_vector.begin (),
                         equiv_end_ = equivset_->_index_vector.end ();
                         equiv_iter_ != equiv_end_; ++equiv_iter_)
@@ -333,7 +337,7 @@
         }
     }
 
- static std::size_t closure (typename internal::node::node_vector *followpos_,
+ static std::size_t closure (typename detail::node::node_vector *followpos_,
         node_set_vector &seen_sets_, node_vector_vector &seen_vectors_,
         size_t_vector &hash_vector_, const std::size_t size_, size_t_vector &dfa_)
     {
@@ -348,12 +352,12 @@
         std::auto_ptr<node_set> set_ptr_ (new node_set);
         std::auto_ptr<node_vector> vector_ptr_ (new node_vector);
 
- for (typename internal::node::node_vector::const_iterator iter_ =
+ for (typename detail::node::node_vector::const_iterator iter_ =
             followpos_->begin (), end_ = followpos_->end ();
             iter_ != end_; ++iter_)
         {
- closure_ex (*iter_, end_state_, id_, state_, followpos_,
- set_ptr_.get (), vector_ptr_.get (), hash_);
+ closure_ex (*iter_, end_state_, id_, state_, set_ptr_.get (),
+ vector_ptr_.get (), hash_);
         }
 
         bool found_ = false;
@@ -399,9 +403,8 @@
         return index_;
     }
 
- static void closure_ex (internal::node *node_, bool &end_state_,
- std::size_t &id_, std::size_t &state_,
- typename internal::node::node_vector * /*followpos_*/, node_set *set_ptr_,
+ static void closure_ex (detail::node *node_, bool &end_state_,
+ std::size_t &id_, std::size_t &state_, node_set *set_ptr_,
         node_vector *vector_ptr_, std::size_t &hash_)
     {
         const bool temp_end_state_ = node_->end_state ();
@@ -529,7 +532,7 @@
 
         if (token_._negated)
         {
- CharT curr_char_ = sizeof(CharT) == 1 ? -128 : 0;
+ CharT curr_char_ = sizeof (CharT) == 1 ? -128 : 0;
             std::size_t i_ = 0;
 
             while (curr_ < chars_end_)
@@ -655,7 +658,7 @@
 
         for (; iter_ != end_; ++iter_)
         {
- const internal::node *node_ = *iter_;
+ const detail::node *node_ = *iter_;
 
             if (!node_->end_state ())
             {
@@ -671,31 +674,31 @@
 
                         index_set_.insert (token_);
                         list_->back () = new equivset (index_set_,
- node_->followpos ());
+ node_->greedy (), node_->token (), node_->followpos ());
                     }
                     else
                     {
                         list_->back () = new equivset (set_mapping_[token_],
- node_->followpos ());
+ node_->greedy (), node_->token (), node_->followpos ());
                     }
                 }
             }
         }
     }
 
- static void fixup_bol (internal::node * &root_,
+ static void fixup_bol (detail::node * &root_,
         node_ptr_vector &node_ptr_vector_)
     {
- typename internal::node::node_vector *first_ = &root_->firstpos ();
+ typename detail::node::node_vector *first_ = &root_->firstpos ();
         bool found_ = false;
- typename internal::node::node_vector::const_iterator iter_ =
+ typename detail::node::node_vector::const_iterator iter_ =
             first_->begin ();
- typename internal::node::node_vector::const_iterator end_ =
+ typename detail::node::node_vector::const_iterator end_ =
             first_->end ();
 
         for (; iter_ != end_; ++iter_)
         {
- const internal::node *node_ = *iter_;
+ const detail::node *node_ = *iter_;
 
             found_ = !node_->end_state () && node_->token () == bol_token;
 
@@ -705,23 +708,23 @@
         if (!found_)
         {
             node_ptr_vector_->push_back (0);
- node_ptr_vector_->back () = new internal::leaf_node (bol_token);
+ node_ptr_vector_->back () = new detail::leaf_node (bol_token, true);
 
- internal::node *lhs_ = node_ptr_vector_->back ();
+ detail::node *lhs_ = node_ptr_vector_->back ();
 
             node_ptr_vector_->push_back (0);
- node_ptr_vector_->back () = new internal::leaf_node (null_token);
+ node_ptr_vector_->back () = new detail::leaf_node (null_token, true);
 
- internal::node *rhs_ = node_ptr_vector_->back ();
+ detail::node *rhs_ = node_ptr_vector_->back ();
 
             node_ptr_vector_->push_back (0);
             node_ptr_vector_->back () =
- new internal::selection_node (lhs_, rhs_);
+ new detail::selection_node (lhs_, rhs_);
             lhs_ = node_ptr_vector_->back ();
 
             node_ptr_vector_->push_back (0);
             node_ptr_vector_->back () =
- new internal::sequence_node (lhs_, root_);
+ new detail::sequence_node (lhs_, root_);
             root_ = node_ptr_vector_->back ();
         }
     }
@@ -755,8 +758,10 @@
                     continue;
                 }
 
- using namespace std; // some systems have memcmp in namespace std
- if (memcmp (first_, second_, sizeof(std::size_t) *
+ // Some systems have memcmp in namespace std.
+ using namespace std;
+
+ if (memcmp (first_, second_, sizeof (std::size_t) *
                     dfa_alphabet_) == 0)
                 {
                     index_set_.insert (curr_index_);

Added: branches/release/boost/spirit/home/support/detail/lexer/input.hpp
==============================================================================
--- (empty file)
+++ branches/release/boost/spirit/home/support/detail/lexer/input.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,486 @@
+// input.hpp
+// Copyright (c) 2008 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_INPUT
+#define BOOST_LEXER_INPUT
+
+#include "char_traits.hpp"
+#include <iterator>
+#include "size_t.hpp"
+#include "state_machine.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+template<typename FwdIter, typename Traits =
+ char_traits<typename boost::detail::iterator_traits<FwdIter>::value_type> >
+class basic_input
+{
+public:
+ class iterator
+ {
+ public:
+#if defined _MSC_VER && _MSC_VER <= 1200
+ friend basic_input;
+#else
+ friend class basic_input;
+#endif
+
+ struct data
+ {
+ std::size_t id;
+ FwdIter start;
+ FwdIter end;
+ bool bol;
+ std::size_t state;
+
+ // Construct in end() state.
+ data () :
+ id (0),
+ bol (false),
+ state (npos)
+ {
+ }
+
+ bool operator == (const data &rhs_) const
+ {
+ return id == rhs_.id && start == rhs_.start &&
+ end == rhs_.end && bol == rhs_.bol && state == rhs_.state;
+ }
+ };
+
+ iterator () :
+ _input (0)
+ {
+ }
+
+ bool operator == (const iterator &rhs_) const
+ {
+ return _data == rhs_._data;
+ }
+
+ bool operator != (const iterator &rhs_) const
+ {
+ return !(*this == rhs_);
+ }
+
+ data &operator * ()
+ {
+ return _data;
+ }
+
+ data *operator -> ()
+ {
+ return &_data;
+ }
+
+ // Let compiler generate operator = ().
+
+ // prefix version
+ iterator &operator ++ ()
+ {
+ next_token ();
+ return *this;
+ }
+
+ // postfix version
+ iterator operator ++ (int)
+ {
+ iterator iter_ = *this;
+
+ next_token ();
+ return iter_;
+ }
+
+ private:
+ // Not owner (obviously!)
+ const basic_input *_input;
+ data _data;
+
+ void next_token ()
+ {
+ _data.start = _data.end;
+
+ if (_input->_state_machine->_dfa->size () == 1)
+ {
+ if (_input->_state_machine->_seen_BOL_assertion ||
+ _input->_state_machine->_seen_EOL_assertion)
+ {
+ _data.id = next
+ (&_input->_state_machine->_lookup->front ()->front (),
+ _input->_state_machine->_dfa_alphabet.front (),
+ &_input->_state_machine->_dfa->front ()->front (),
+ _input->_begin, _data.end, _input->_end);
+ }
+ else
+ {
+ _data.id = next (&_input->_state_machine->_lookup->
+ front ()->front (), _input->_state_machine->
+ _dfa_alphabet.front (), &_input->_state_machine->
+ _dfa->front ()->front (), _data.end, _input->_end);
+ }
+ }
+ else
+ {
+ if (_input->_state_machine->_seen_BOL_assertion ||
+ _input->_state_machine->_seen_EOL_assertion)
+ {
+ _data.id = next (*_input->_state_machine, _data.state,
+ _input->_begin, _data.end, _input->_end);
+ }
+ else
+ {
+ _data.id = next (*_input->_state_machine, _data.state,
+ _data.end, _input->_end);
+ }
+ }
+
+ if (_data.end == _input->_end && _data.start == _data.end)
+ {
+ // Ensure current state matches that returned by end().
+ _data.state = npos;
+ }
+ }
+
+ std::size_t next (const basic_state_machine
+ <typename Traits::char_type> &state_machine_,
+ std::size_t &start_state_, const FwdIter &start_,
+ FwdIter &start_token_, const FwdIter &end_)
+ {
+ if (start_token_ == end_) return 0;
+
+ again:
+ bool bol_ = _data.bol;
+ const std::size_t * lookup_ = &state_machine_._lookup[start_state_]->
+ front ();
+ std::size_t dfa_alphabet_ = state_machine_._dfa_alphabet[start_state_];
+ const std::size_t *dfa_ = &state_machine_._dfa[start_state_]->front ();
+ const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
+ FwdIter curr_ = start_token_;
+ bool end_state_ = *ptr_ != 0;
+ std::size_t id_ = *(ptr_ + id_index);
+ bool end_bol_ = bol_;
+ FwdIter end_token_ = start_token_;
+
+ while (curr_ != end_)
+ {
+ const std::size_t BOL_state_ = ptr_[bol_index];
+ const std::size_t EOL_state_ = ptr_[eol_index];
+
+ if (BOL_state_ && bol_)
+ {
+ ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
+ }
+ else if (EOL_state_ && *curr_ == '\n')
+ {
+ ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
+ }
+ else
+ {
+ typename Traits::char_type prev_char_ = *curr_++;
+
+ bol_ = prev_char_ == '\n';
+
+ const std::size_t state_ =
+ ptr_[lookup_[static_cast<typename Traits::index_type>
+ (prev_char_)]];
+
+ if (state_ == 0)
+ {
+ break;
+ }
+
+ ptr_ = &dfa_[state_ * dfa_alphabet_];
+ }
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ start_state_ = *(ptr_ + state_index);
+ end_bol_ = bol_;
+ end_token_ = curr_;
+ }
+ }
+
+ const std::size_t EOL_state_ = ptr_[eol_index];
+
+ if (EOL_state_ && curr_ == end_)
+ {
+ ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ start_state_ = *(ptr_ + state_index);
+ end_bol_ = bol_;
+ end_token_ = curr_;
+ }
+ }
+
+ if (end_state_)
+ {
+ // return longest match
+ _data.bol = end_bol_;
+ start_token_ = end_token_;
+
+ if (id_ == 0) goto again;
+ }
+ else
+ {
+ // No match causes char to be skipped
+ _data.bol = *start_token_ == '\n';
+ ++start_token_;
+ id_ = npos;
+ }
+
+ return id_;
+ }
+
+ std::size_t next (const basic_state_machine
+ <typename Traits::char_type> &state_machine_,
+ std::size_t &start_state_, FwdIter &start_token_,
+ FwdIter const &end_)
+ {
+ if (start_token_ == end_) return 0;
+
+ again:
+ const std::size_t * lookup_ = &state_machine_._lookup[start_state_]->
+ front ();
+ std::size_t dfa_alphabet_ = state_machine_._dfa_alphabet[start_state_];
+ const std::size_t *dfa_ = &state_machine_._dfa[start_state_]->front ();
+ const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
+ FwdIter curr_ = start_token_;
+ bool end_state_ = *ptr_ != 0;
+ std::size_t id_ = *(ptr_ + id_index);
+ FwdIter end_token_ = start_token_;
+
+ while (curr_ != end_)
+ {
+ const std::size_t state_ = ptr_[lookup_[static_cast
+ <typename Traits::index_type>(*curr_++)]];
+
+ if (state_ == 0)
+ {
+ break;
+ }
+
+ ptr_ = &dfa_[state_ * dfa_alphabet_];
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ start_state_ = *(ptr_ + state_index);
+ end_token_ = curr_;
+ }
+ }
+
+ if (end_state_)
+ {
+ // return longest match
+ start_token_ = end_token_;
+
+ if (id_ == 0) goto again;
+ }
+ else
+ {
+ // No match causes char to be skipped
+ ++start_token_;
+ id_ = npos;
+ }
+
+ return id_;
+ }
+
+ std::size_t next (const std::size_t * const lookup_,
+ const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
+ FwdIter const &start_, FwdIter &start_token_, FwdIter const &end_)
+ {
+ if (start_token_ == end_) return 0;
+
+ bool bol_ = _data.bol;
+ const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
+ FwdIter curr_ = start_token_;
+ bool end_state_ = *ptr_ != 0;
+ std::size_t id_ = *(ptr_ + id_index);
+ bool end_bol_ = bol_;
+ FwdIter end_token_ = start_token_;
+
+ while (curr_ != end_)
+ {
+ const std::size_t BOL_state_ = ptr_[bol_index];
+ const std::size_t EOL_state_ = ptr_[eol_index];
+
+ if (BOL_state_ && bol_)
+ {
+ ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
+ }
+ else if (EOL_state_ && *curr_ == '\n')
+ {
+ ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
+ }
+ else
+ {
+ typename Traits::char_type prev_char_ = *curr_++;
+
+ bol_ = prev_char_ == '\n';
+
+ const std::size_t state_ =
+ ptr_[lookup_[static_cast<typename Traits::index_type>
+ (prev_char_)]];
+
+ if (state_ == 0)
+ {
+ break;
+ }
+
+ ptr_ = &dfa_[state_ * dfa_alphabet_];
+ }
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ end_bol_ = bol_;
+ end_token_ = curr_;
+ }
+ }
+
+ const std::size_t EOL_state_ = ptr_[eol_index];
+
+ if (EOL_state_ && curr_ == end_)
+ {
+ ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ end_bol_ = bol_;
+ end_token_ = curr_;
+ }
+ }
+
+ if (end_state_)
+ {
+ // return longest match
+ start_token_ = end_token_;
+ _data.bol = end_bol_;
+ }
+ else
+ {
+ // No match causes char to be skipped
+ _data.bol = *start_token_ == '\n';
+ ++start_token_;
+ id_ = npos;
+ }
+
+ return id_;
+ }
+
+ std::size_t next (const std::size_t * const lookup_,
+ const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
+ FwdIter &start_token_, FwdIter const &end_)
+ {
+ if (start_token_ == end_) return 0;
+
+ const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
+ FwdIter curr_ = start_token_;
+ bool end_state_ = *ptr_ != 0;
+ std::size_t id_ = *(ptr_ + id_index);
+ FwdIter end_token_ = start_token_;
+
+ while (curr_ != end_)
+ {
+ const std::size_t state_ = ptr_[lookup_[static_cast
+ <typename Traits::index_type>(*curr_++)]];
+
+ if (state_ == 0)
+ {
+ break;
+ }
+
+ ptr_ = &dfa_[state_ * dfa_alphabet_];
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ end_token_ = curr_;
+ }
+ }
+
+ if (end_state_)
+ {
+ // return longest match
+ start_token_ = end_token_;
+ }
+ else
+ {
+ // No match causes char to be skipped
+ ++start_token_;
+ id_ = npos;
+ }
+
+ return id_;
+ }
+ };
+
+#if defined _MSC_VER && _MSC_VER <= 1200
+ friend iterator;
+#else
+ friend class iterator;
+#endif
+
+ // Make it explict that we are NOT taking a copy of state_machine_!
+ basic_input (const basic_state_machine<typename Traits::char_type>
+ *state_machine_, const FwdIter &begin_, const FwdIter &end_) :
+ _state_machine (state_machine_),
+ _begin (begin_),
+ _end (end_)
+ {
+ }
+
+ iterator begin () const
+ {
+ iterator iter_;
+
+ iter_._input = this;
+ iter_._data.id = npos;
+ iter_._data.start = _begin;
+ iter_._data.end = _begin;
+ iter_._data.bol = _state_machine->_seen_BOL_assertion;
+ iter_._data.state = 0;
+ ++iter_;
+ return iter_;
+ }
+
+ iterator end () const
+ {
+ iterator iter_;
+
+ iter_._input = this;
+ iter_._data.start = _end;
+ iter_._data.end = _end;
+ return iter_;
+ }
+
+private:
+ const basic_state_machine<typename Traits::char_type> *_state_machine;
+ FwdIter _begin;
+ FwdIter _end;
+};
+
+typedef basic_input<std::string::iterator> iter_input;
+typedef basic_input<std::wstring::iterator> iter_winput;
+typedef basic_input<const char *> ptr_input;
+typedef basic_input<const wchar_t *> ptr_winput;
+}
+}
+
+#endif

Modified: branches/release/boost/spirit/home/support/detail/lexer/parser/parser.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/parser/parser.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/parser/parser.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // parser.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -20,7 +20,7 @@
 {
 namespace lexer
 {
-namespace internal
+namespace detail
 {
 template<typename CharT>
 class basic_parser
@@ -104,7 +104,7 @@
 
         assert (tree_node_stack_.size () == 1);
 
- node *lhs_node_ = tree_node_stack_.top ();
+ node *lhs_node_ = tree_node_stack_.top ();
 
         tree_node_stack_.pop ();
 
@@ -187,28 +187,38 @@
             repeat (handle_, token_stack_);
             break;
         case token::CHARSET:
- charset (handle_, token_stack_, node_vector_ptr_, tree_node_stack_);
+ charset (handle_, token_stack_, node_vector_ptr_,
+ tree_node_stack_);
             break;
         case token::MACRO:
- macro (handle_, token_stack_, macromap_, node_vector_ptr_, tree_node_stack_);
+ macro (handle_, token_stack_, macromap_, node_vector_ptr_,
+ tree_node_stack_);
             break;
         case token::OPENPAREN:
             openparen (handle_, token_stack_);
             break;
         case token::OPT:
- optional (node_vector_ptr_, tree_node_stack_);
+ case token::AOPT:
+ optional (rhs_._type == token::OPT, node_vector_ptr_,
+ tree_node_stack_);
             token_stack_.push (token::DUP);
             break;
         case token::ZEROORMORE:
- zero_or_more (node_vector_ptr_, tree_node_stack_);
+ case token::AZEROORMORE:
+ zero_or_more (rhs_._type == token::ZEROORMORE, node_vector_ptr_,
+ tree_node_stack_);
             token_stack_.push (token::DUP);
             break;
         case token::ONEORMORE:
- one_or_more (node_vector_ptr_, tree_node_stack_);
+ case token::AONEORMORE:
+ one_or_more (rhs_._type == token::ONEORMORE, node_vector_ptr_,
+ tree_node_stack_);
             token_stack_.push (token::DUP);
             break;
         case token::REPEATN:
- repeatn (handle_.top (), node_vector_ptr_, tree_node_stack_);
+ case token::AREPEATN:
+ repeatn (rhs_._type == token::REPEATN, handle_.top (),
+ node_vector_ptr_, tree_node_stack_);
             token_stack_.push (token::DUP);
             break;
         default:
@@ -243,7 +253,7 @@
         node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
     {
         assert (handle_.top ()._type == token::SUB &&
- (handle_.size () == 1 || handle_.size () == 2));
+ handle_.size () == 1 || handle_.size () == 2);
 
         if (handle_.size () == 1)
         {
@@ -262,7 +272,7 @@
     static void repeat (token_stack &handle_, token_stack &token_stack_)
     {
         assert (handle_.top ()._type == token::REPEAT &&
- (handle_.size () >= 1 && handle_.size () <= 3));
+ handle_.size () >= 1 && handle_.size () <= 3);
 
         if (handle_.size () == 1)
         {
@@ -286,7 +296,7 @@
 
         const size_t id_ = handle_.top ()._id;
 
- node_ptr_vector_->back () = new leaf_node (id_);
+ node_ptr_vector_->back () = new leaf_node (id_, true);
         tree_node_stack_.push (node_ptr_vector_->back ());
         token_stack_.push (token::REPEAT);
     }
@@ -352,7 +362,7 @@
     static void sequence (node_ptr_vector &node_ptr_vector_,
         tree_node_stack &tree_node_stack_)
     {
- node *rhs_ = tree_node_stack_.top ();
+ node *rhs_ = tree_node_stack_.top ();
 
         tree_node_stack_.pop ();
 
@@ -363,15 +373,24 @@
         tree_node_stack_.top () = node_ptr_vector_->back ();
     }
 
- static void optional (node_ptr_vector &node_ptr_vector_,
- tree_node_stack &tree_node_stack_)
+ static void optional (const bool greedy_,
+ node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
     {
         // perform ?
         node *lhs_ = tree_node_stack_.top ();
+ // You don't know if lhs_ is a leaf_node, so get firstpos.
+ node::node_vector &firstpos_ = lhs_->firstpos();
+
+ for (node::node_vector::iterator iter_ = firstpos_.begin (),
+ end_ = firstpos_.end (); iter_ != end_; ++iter_)
+ {
+ // These are leaf_nodes!
+ (*iter_)->greedy (greedy_);
+ }
 
         node_ptr_vector_->push_back (0);
 
- node *rhs_ = new leaf_node (null_token);
+ node *rhs_ = new leaf_node (null_token, greedy_);
 
         node_ptr_vector_->back () = rhs_;
         node_ptr_vector_->push_back (0);
@@ -379,27 +398,27 @@
         tree_node_stack_.top () = node_ptr_vector_->back ();
     }
 
- static void zero_or_more (node_ptr_vector &node_ptr_vector_,
- tree_node_stack &tree_node_stack_)
+ static void zero_or_more (const bool greedy_,
+ node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
     {
         // perform *
         node *ptr_ = tree_node_stack_.top ();
 
         node_ptr_vector_->push_back (0);
- node_ptr_vector_->back () = new iteration_node (ptr_);
+ node_ptr_vector_->back () = new iteration_node (ptr_, greedy_);
         tree_node_stack_.top () = node_ptr_vector_->back ();
     }
 
- static void one_or_more (node_ptr_vector &node_ptr_vector_,
- tree_node_stack &tree_node_stack_)
+ static void one_or_more (const bool greedy_,
+ node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
     {
         // perform +
- node *lhs_ = tree_node_stack_.top ();
+ node *lhs_ = tree_node_stack_.top ();
         node *copy_ = lhs_->copy (node_ptr_vector_);
 
         node_ptr_vector_->push_back (0);
 
- node *rhs_ = new iteration_node (copy_);
+ node *rhs_ = new iteration_node (copy_, greedy_);
 
         node_ptr_vector_->back () = rhs_;
         node_ptr_vector_->push_back (0);
@@ -407,7 +426,7 @@
         tree_node_stack_.top () = node_ptr_vector_->back ();
     }
 
- static void repeatn (const token &token_,
+ static void repeatn (const bool greedy_, const token &token_,
         node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
     {
         // perform {n[,[m]]}
@@ -423,7 +442,7 @@
 
             if (token_._min == 0)
             {
- optional (node_ptr_vector_, tree_node_stack_);
+ optional (greedy_, node_ptr_vector_, tree_node_stack_);
             }
 
             node *prev_ = tree_node_stack_.top ()->copy (node_ptr_vector_);
@@ -431,7 +450,7 @@
 
             for (std::size_t i_ = 2; i_ < top_; ++i_)
             {
- node *temp_ = prev_->copy (node_ptr_vector_);
+ node *temp_ = prev_->copy (node_ptr_vector_);
 
                 curr_ = temp_;
                 tree_node_stack_.push (0);
@@ -444,7 +463,7 @@
             {
                 if (token_._min > 1)
                 {
- node *temp_ = prev_->copy (node_ptr_vector_);
+ node *temp_ = prev_->copy (node_ptr_vector_);
 
                     curr_ = temp_;
                     tree_node_stack_.push (0);
@@ -457,9 +476,9 @@
                 {
                     tree_node_stack_.push (0);
                     tree_node_stack_.top () = prev_;
- optional (node_ptr_vector_, tree_node_stack_);
+ optional (greedy_, node_ptr_vector_, tree_node_stack_);
 
- node *temp_ = tree_node_stack_.top ();
+ node *temp_ = tree_node_stack_.top ();
 
                     tree_node_stack_.pop ();
                     prev_ = temp_;
@@ -481,9 +500,9 @@
                 {
                     tree_node_stack_.push (0);
                     tree_node_stack_.top () = prev_;
- zero_or_more (node_ptr_vector_, tree_node_stack_);
+ zero_or_more (greedy_, node_ptr_vector_, tree_node_stack_);
 
- node *temp_ = tree_node_stack_.top ();
+ node *temp_ = tree_node_stack_.top ();
 
                     prev_ = temp_;
                     tree_node_stack_.pop ();

Modified: branches/release/boost/spirit/home/support/detail/lexer/parser/tokeniser/num_token.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/parser/tokeniser/num_token.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/parser/tokeniser/num_token.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // num_token.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -7,23 +7,23 @@
 #define BOOST_LEXER_NUM_TOKEN_HPP
 
 #include <boost/config.hpp>
-#include <boost/detail/workaround.hpp>
-
 #include "../../consts.hpp" // null_token
 #include "../../size_t.hpp"
+#include <boost/detail/workaround.hpp>
 
 namespace boost
 {
 namespace lexer
 {
-namespace internal
+namespace detail
 {
 template<typename CharT>
 struct basic_num_token
 {
     enum type {BEGIN, REGEX, OREXP, SEQUENCE, SUB, EXPRESSION, REPEAT,
- DUP, OR, CHARSET, MACRO, OPENPAREN, CLOSEPAREN, OPT,
- ZEROORMORE, ONEORMORE, REPEATN, END};
+ DUP, OR, CHARSET, MACRO, OPENPAREN, CLOSEPAREN, OPT, AOPT,
+ ZEROORMORE, AZEROORMORE, ONEORMORE, AONEORMORE, REPEATN, AREPEATN,
+ END};
 
     type _type;
     std::size_t _id;
@@ -102,42 +102,45 @@
 
 template<typename CharT>
 const char basic_num_token<CharT>::_precedence_table[END + 1][END + 1] = {
-// BEG, REG, ORE, SEQ, SUB, EXP, RPT, DUP, | , CHR, MCR, ( , ) , ? , * , + , {n}, END
-/*BEGIN*/{' ', '<', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', '>'},
-/*REGEX*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', ' ', ' ', ' ', ' ', '>'},
-/*OREXP*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', '>'},
-/* SEQ */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', '>'},
-/* SUB */{' ', ' ', ' ', ' ', ' ', '=', '<', ' ', '>', '<', '<', '<', '>', ' ', ' ', ' ', ' ', '>'},
-/*EXPRE*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', '>'},
-/* RPT */{' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', '>', '<', '<', '<', '<', '>'},
-/*DUPLI*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', '>'},
-/* | */{' ', ' ', ' ', '=', '<', '<', '<', ' ', ' ', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' '},
-/*CHARA*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
-/*MACRO*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
-/* ( */{' ', '=', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' '},
-/* ) */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
-/* ? */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', '>'},
-/* * */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', '>'},
-/* + */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', '>'},
-/*{n,m}*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', '>'},
-/* END */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '}
+// BEG, REG, ORE, SEQ, SUB, EXP, RPT, DUP, | , CHR, MCR, ( , ) , ? , ?? , * , *? , + , +?, {n}?, {n}, END
+/*BEGIN*/{' ', '<', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*REGEX*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*OREXP*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* SEQ */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* SUB */{' ', ' ', ' ', ' ', ' ', '=', '<', ' ', '>', '<', '<', '<', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*EXPRE*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* RPT */{' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', '>', '<', '<', '<', '<', '<', '<', '<', '<', '>'},
+/*DUPLI*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* | */{' ', ' ', ' ', '=', '<', '<', '<', ' ', ' ', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '},
+/*CHARA*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/*MACRO*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/* ( */{' ', '=', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '},
+/* ) */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/* ? */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* ?? */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* * */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* *? */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* + */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* +? */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*{n,m}*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*{nm}?*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* END */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '}
 };
 
 template<typename CharT>
 const char *basic_num_token<CharT>::_precedence_strings[END + 1] =
-{
 #if BOOST_WORKAROUND(BOOST_INTEL_CXX_VERSION, BOOST_TESTED_AT(910))
- {"BEGIN"}, {"REGEX"}, {"OREXP"}, {"SEQUENCE"}, {"SUB"}, {"EXPRESSION"},
- {"REPEAT"}, {"DUPLICATE"}, {"|"}, {"CHARSET"}, {"MACRO"},
- {"("}, {")"}, {"?"}, {"*"}, {"+"}, {"{n[,[m]]}"}, {"END"}
+{{"BEGIN"}, {"REGEX"}, {"OREXP"}, {"SEQUENCE"}, {"SUB"}, {"EXPRESSION"},
+ {"REPEAT"}, {"DUPLICATE"}, {"|"}, {"CHARSET"}, {"MACRO"},
+ {"("}, {")"}, {"?"}, {"??"}, {"*"}, {"*?"}, {"+"}, {"+?"}, {"{n[,[m]]}"},
+ {"{n[,[m]]}?"}, {"END"}
+}
 #else
- "BEGIN", "REGEX", "OREXP", "SEQUENCE", "SUB", "EXPRESSION",
- "REPEAT", "DUPLICATE", "|", "CHARSET", "MACRO",
- "(", ")", "?", "*", "+", "{n[,[m]]}", "END"
-#endif
-};
-
+{"BEGIN", "REGEX", "OREXP", "SEQUENCE", "SUB", "EXPRESSION", "REPEAT",
+ "DUPLICATE", "|", "CHARSET", "MACRO", "(", ")", "?", "??", "*", "*?",
+ "+", "+?", "{n[,[m]]}", "{n[,[m]]}?", "END"};
 }
+#endif
 }
 }
 

Modified: branches/release/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,13 +1,14 @@
 // tokeniser.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 #ifndef BOOST_LEXER_RE_TOKENISER_HPP
 #define BOOST_LEXER_RE_TOKENISER_HPP
 
+// memcpy()
+#include <cstring>
 #include <map>
-#include <cstring> // memcpy
 #include "num_token.hpp"
 #include "../../runtime_error.hpp"
 #include "../../size_t.hpp"
@@ -19,7 +20,7 @@
 {
 namespace lexer
 {
-namespace internal
+namespace detail
 {
 template<typename CharT>
 class basic_re_tokeniser
@@ -93,13 +94,40 @@
                     token_.set (num_token::CLOSEPAREN, null_token);
                     break;
                 case '?':
- token_.set (num_token::OPT, null_token);
+ if (!state_.eos () && *state_._curr == '?')
+ {
+ token_.set (num_token::AOPT, null_token);
+ state_.increment ();
+ }
+ else
+ {
+ token_.set (num_token::OPT, null_token);
+ }
+
                     break;
                 case '*':
- token_.set (num_token::ZEROORMORE, null_token);
+ if (!state_.eos () && *state_._curr == '?')
+ {
+ token_.set (num_token::AZEROORMORE, null_token);
+ state_.increment ();
+ }
+ else
+ {
+ token_.set (num_token::ZEROORMORE, null_token);
+ }
+
                     break;
                 case '+':
- token_.set (num_token::ONEORMORE, null_token);
+ if (!state_.eos () && *state_._curr == '?')
+ {
+ token_.set (num_token::AONEORMORE, null_token);
+ state_.increment ();
+ }
+ else
+ {
+ token_.set (num_token::ONEORMORE, null_token);
+ }
+
                     break;
                 case '{':
                     open_curly (state_, token_);
@@ -223,6 +251,12 @@
         else if (*state_._curr >= '0' && *state_._curr <= '9')
         {
             repeat_n (state_, token_);
+
+ if (!state_.eos () && *state_._curr == '?')
+ {
+ token_._type = num_token::AREPEATN;
+ state_.increment ();
+ }
         }
         else
         {
@@ -391,8 +425,8 @@
                 throw runtime_error ("Unexpected end of regex "
                     "(missing '}').");
             }
- } while (ch_ == '_' || ch_ == '-' || (ch_ >= 'A' && ch_ <= 'Z') ||
- (ch_ >= 'a' && ch_ <= 'z') || (ch_ >= '0' && ch_ <= '9'));
+ } while (ch_ == '_' || ch_ == '-' || ch_ >= 'A' && ch_ <= 'Z' ||
+ ch_ >= 'a' && ch_ <= 'z' || ch_ >= '0' && ch_ <= '9');
 
         if (ch_ != '}')
         {
@@ -422,8 +456,11 @@
         }
 
         token_.set (num_token::MACRO, null_token);
- using namespace std; // some systems have memcpy in namespace std
- memcpy (token_._macro, start_, len_ * sizeof(CharT));
+
+ // Some systems have memcpy in namespace std.
+ using namespace std;
+
+ memcpy (token_._macro, start_, len_ * sizeof (CharT));
         token_._macro[len_] = 0;
     }
 };

Modified: branches/release/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,14 +1,14 @@
 // tokeniser_helper.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 #ifndef BOOST_LEXER_RE_TOKENISER_HELPER_H
 #define BOOST_LEXER_RE_TOKENISER_HELPER_H
 
-#include <cstring> // for strlen
-
 #include "../../char_traits.hpp"
+// strlen()
+#include <cstring>
 #include "../../size_t.hpp"
 #include "re_tokeniser_state.hpp"
 
@@ -16,7 +16,7 @@
 {
 namespace lexer
 {
-namespace internal
+namespace detail
 {
 template<typename CharT, typename Traits = char_traits<CharT> >
 class basic_re_tokeniser_helper
@@ -192,7 +192,9 @@
 
         if (str_)
         {
- using namespace std; // some systems have strlen in namespace std
+ // Some systems have strlen in namespace std.
+ using namespace std;
+
             str_len_ = strlen (str_);
         }
         else
@@ -232,7 +234,9 @@
 
         if (str_)
         {
- using namespace std; // some systems have wcslen in namespace std
+ // Some systems have wcslen in namespace std.
+ using namespace std;
+
             str_len_ = wcslen (str_);
         }
         else
@@ -310,32 +314,28 @@
     static CharT decode_octal (state &state_)
     {
         std::size_t accumulator_ = 0;
- CharT ch_ = 0;
+ CharT ch_ = *state_._curr;
         unsigned short count_ = 3;
- bool eos_ = state_.next (ch_);
+ bool eos_ = false;
 
- do
+ for (;;)
         {
             accumulator_ *= 8;
             accumulator_ += ch_ - '0';
             --count_;
+ state_.increment ();
             eos_ = state_.eos ();
 
- if (!eos_)
- {
- ch_ = *state_._curr;
+ if (!count_ || eos_) break;
 
- // Don't consume invalid chars!
- if (ch_ >= '0' && ch_ <= '7')
- {
- state_.increment ();
- }
- else
- {
- eos_ = true;
- }
+ ch_ = *state_._curr;
+
+ // Don't consume invalid chars!
+ if (ch_ < '0' || ch_ > '7')
+ {
+ break;
             }
- } while (!eos_ && count_);
+ }
 
         return static_cast<CharT> (accumulator_);
     }

Modified: branches/release/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_state.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_state.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_state.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // tokeniser_state.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -13,7 +13,7 @@
 {
 namespace lexer
 {
-namespace internal
+namespace detail
 {
 template<typename CharT>
 struct basic_re_tokeniser_state

Modified: branches/release/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // end_node.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -13,7 +13,7 @@
 {
 namespace lexer
 {
-namespace internal
+namespace detail
 {
 class end_node : public node
 {

Modified: branches/release/boost/spirit/home/support/detail/lexer/parser/tree/iteration_node.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/parser/tree/iteration_node.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/parser/tree/iteration_node.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // iteration_node.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -12,14 +12,15 @@
 {
 namespace lexer
 {
-namespace internal
+namespace detail
 {
 class iteration_node : public node
 {
 public:
- iteration_node (node *next_) :
+ iteration_node (node *next_, const bool greedy_) :
         node (true),
- _next (next_)
+ _next (next_),
+ _greedy (greedy_)
     {
         node_vector::iterator iter_;
         node_vector::iterator end_;
@@ -32,6 +33,12 @@
         {
             (*iter_)->append_followpos (_firstpos);
         }
+
+ for (iter_ = _firstpos.begin (), end_ = _firstpos.end ();
+ iter_ != end_; ++iter_)
+ {
+ (*iter_)->greedy (greedy_);
+ }
     }
 
     virtual ~iteration_node ()
@@ -54,6 +61,7 @@
 private:
     // Not owner of this pointer...
     node *_next;
+ bool _greedy;
 
     virtual void copy_node (node_ptr_vector &node_ptr_vector_,
         node_stack &new_node_stack_, bool_stack &perform_op_stack_,
@@ -61,10 +69,10 @@
     {
         if (perform_op_stack_.top ())
         {
- node *ptr_ = new_node_stack_.top ();
+ node *ptr_ = new_node_stack_.top ();
 
             node_ptr_vector_->push_back (0);
- node_ptr_vector_->back () = new iteration_node (ptr_);
+ node_ptr_vector_->back () = new iteration_node (ptr_, _greedy);
             new_node_stack_.top () = node_ptr_vector_->back ();
         }
         else

Modified: branches/release/boost/spirit/home/support/detail/lexer/parser/tree/leaf_node.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/parser/tree/leaf_node.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/parser/tree/leaf_node.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // leaf_node.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -14,14 +14,16 @@
 {
 namespace lexer
 {
-namespace internal
+namespace detail
 {
 class leaf_node : public node
 {
 public:
- leaf_node (const std::size_t token_) :
+ leaf_node (const std::size_t token_, const bool greedy_) :
         node (token_ == null_token),
- _token (token_)
+ _token (token_),
+ _set_greedy (!greedy_),
+ _greedy (greedy_)
     {
         if (!_nullable)
         {
@@ -59,6 +61,20 @@
         return _token;
     }
 
+ virtual void greedy (const bool greedy_)
+ {
+ if (!_set_greedy)
+ {
+ _greedy = greedy_;
+ _set_greedy = true;
+ }
+ }
+
+ virtual bool greedy () const
+ {
+ return _greedy;
+ }
+
     virtual const node_vector &followpos () const
     {
         return _followpos;
@@ -71,6 +87,8 @@
 
 private:
     std::size_t _token;
+ bool _set_greedy;
+ bool _greedy;
     node_vector _followpos;
 
     virtual void copy_node (node_ptr_vector &node_ptr_vector_,
@@ -78,7 +96,7 @@
         bool &/*down_*/) const
     {
         node_ptr_vector_->push_back (0);
- node_ptr_vector_->back () = new leaf_node (_token);
+ node_ptr_vector_->back () = new leaf_node (_token, _greedy);
         new_node_stack_.push (node_ptr_vector_->back ());
     }
 };

Modified: branches/release/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // node.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -17,7 +17,7 @@
 {
 namespace lexer
 {
-namespace internal
+namespace detail
 {
 class node
 {
@@ -64,7 +64,7 @@
 
     virtual void append_followpos (const node_vector &/*followpos_*/)
     {
- throw runtime_error ("Internal error node::append_followpos");
+ throw runtime_error ("Internal error node::append_followpos()");
     }
 
     node *copy (node_ptr_vector &node_ptr_vector_) const
@@ -129,27 +129,37 @@
 
     virtual std::size_t id () const
     {
- throw runtime_error ("Internal error node::id");
+ throw runtime_error ("Internal error node::id()");
     }
 
     virtual std::size_t lexer_state () const
     {
- throw runtime_error ("Internal error node::state");
+ throw runtime_error ("Internal error node::state()");
     }
 
     virtual std::size_t token () const
     {
- throw runtime_error ("Internal error node::token");
+ throw runtime_error ("Internal error node::token()");
+ }
+
+ virtual void greedy (const bool /*greedy_*/)
+ {
+ throw runtime_error ("Internal error node::token(bool)");
+ }
+
+ virtual bool greedy () const
+ {
+ throw runtime_error ("Internal error node::token()");
     }
 
     virtual const node_vector &followpos () const
     {
- throw runtime_error ("Internal error node::followpos");
+ throw runtime_error ("Internal error node::followpos()");
     }
 
     virtual node_vector &followpos ()
     {
- throw runtime_error ("Internal error node::followpos");
+ throw runtime_error ("Internal error node::followpos()");
     }
 
 protected:

Modified: branches/release/boost/spirit/home/support/detail/lexer/parser/tree/selection_node.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/parser/tree/selection_node.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/parser/tree/selection_node.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // selection_node.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -12,7 +12,7 @@
 {
 namespace lexer
 {
-namespace internal
+namespace detail
 {
 class selection_node : public node
 {
@@ -69,7 +69,7 @@
     {
         if (perform_op_stack_.top ())
         {
- node *rhs_ = new_node_stack_.top ();
+ node *rhs_ = new_node_stack_.top ();
 
             new_node_stack_.pop ();
 

Modified: branches/release/boost/spirit/home/support/detail/lexer/parser/tree/sequence_node.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/parser/tree/sequence_node.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/parser/tree/sequence_node.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // sequence_node.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -12,7 +12,7 @@
 {
 namespace lexer
 {
-namespace internal
+namespace detail
 {
 class sequence_node : public node
 {

Modified: branches/release/boost/spirit/home/support/detail/lexer/partition/charset.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/partition/charset.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/partition/charset.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // charset.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -14,7 +14,7 @@
 {
 namespace lexer
 {
-namespace internal
+namespace detail
 {
 template<typename CharT>
 struct basic_charset

Modified: branches/release/boost/spirit/home/support/detail/lexer/partition/equivset.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/partition/equivset.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/partition/equivset.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // equivset.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -15,7 +15,7 @@
 {
 namespace lexer
 {
-namespace internal
+namespace detail
 {
 struct equivset
 {
@@ -25,14 +25,20 @@
     typedef std::vector<node *> node_vector;
 
     index_vector _index_vector;
+ bool _greedy;
+ std::size_t _id;
     node_vector _followpos;
 
- equivset ()
+ equivset () :
+ _greedy (true),
+ _id (0)
     {
     }
 
- equivset (const index_set &index_set_,
- const node_vector &followpos_) :
+ equivset (const index_set &index_set_, const bool greedy_,
+ const std::size_t id_, const node_vector &followpos_) :
+ _greedy (greedy_),
+ _id (id_),
         _followpos (followpos_)
     {
         index_set::const_iterator iter_ = index_set_.begin ();
@@ -55,6 +61,20 @@
 
         if (!overlap_._index_vector.empty ())
         {
+ overlap_._id = _id;
+
+ // LHS abstemious transitions have priority.
+ if (_greedy < rhs_._greedy)
+ {
+ overlap_._greedy = _greedy;
+ }
+ else
+ {
+ overlap_._greedy = _greedy;
+ }
+
+ // Note that the LHS takes priority in order to
+ // respect rule ordering priority in the lex spec.
             overlap_._followpos = _followpos;
 
             node_vector::const_iterator overlap_begin_ =

Modified: branches/release/boost/spirit/home/support/detail/lexer/rules.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/rules.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/rules.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // rules.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -21,7 +21,6 @@
 {
 namespace lexer
 {
-
 namespace detail
 {
     // return name of initial state
@@ -31,13 +30,19 @@
     template <>
     struct initial<char>
     {
- static char const* call() { return "INITIAL"; }
+ static const char *str ()
+ {
+ return "INITIAL";
+ }
     };
 
     template <>
     struct initial<wchar_t>
     {
- static wchar_t const* call() { return L"INITIAL"; }
+ static const wchar_t *str ()
+ {
+ return L"INITIAL";
+ }
     };
 }
 
@@ -57,11 +62,11 @@
     typedef std::pair<string, std::size_t> string_size_t_pair;
 
     basic_rules (const bool case_sensitive_ = true,
- const bool dot_not_newline_ = false) :
+ const bool dot_not_newline_ = true) :
         _case_sensitive (case_sensitive_),
         _dot_not_newline (dot_not_newline_)
     {
- add_state (detail::initial<CharT>::call());
+ add_state (initial ());
     }
 
     void clear ()
@@ -75,8 +80,7 @@
         _case_sensitive = true;
         _locale = std::locale ();
         _dot_not_newline = false;
-
- add_state (detail::initial<CharT>::call());
+ add_state (initial ());
     }
 
     void clear (const CharT *state_name_)
@@ -312,9 +316,9 @@
         return empty_;
     }
 
- static CharT const* initial()
+ static const CharT *initial ()
     {
- return detail::initial<CharT>::call();
+ return detail::initial<CharT>::str ();
     }
 
 private:

Modified: branches/release/boost/spirit/home/support/detail/lexer/runtime_error.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/runtime_error.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/runtime_error.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // runtime_error.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

Modified: branches/release/boost/spirit/home/support/detail/lexer/serialise.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/serialise.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/serialise.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // examples/serialise.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -14,8 +14,8 @@
 namespace lexer
 {
 // IMPORTANT! This won't work if you don't enable RTTI!
-template<class Archive>
-void serialise (Archive &ar_, state_machine &sm_, unsigned int version_)
+template<typename CharT, class Archive>
+void serialise (basic_state_machine<CharT> &sm_, Archive &ar_, unsigned int version_ = 1)
 {
     ar_ & version_;
     ar_ & *sm_._lookup;

Modified: branches/release/boost/spirit/home/support/detail/lexer/size_t.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/size_t.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/size_t.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // size_t.h
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

Modified: branches/release/boost/spirit/home/support/detail/lexer/state_machine.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/state_machine.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/state_machine.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // state_machine.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -7,7 +7,7 @@
 #define BOOST_LEXER_STATE_MACHINE_HPP
 
 #include <algorithm>
-#include "char_state_machine.hpp"
+#include "conversion/char_state_machine.hpp"
 #include "consts.hpp"
 #include <deque>
 #include <map>
@@ -19,29 +19,216 @@
 {
 namespace lexer
 {
-struct state_machine
+template<typename CharT>
+class basic_state_machine
 {
- typedef std::vector<std::size_t> size_t_vector;
- typedef internal::ptr_vector<size_t_vector> size_t_vector_vector;
+public:
+ class iterator
+ {
+ public:
+#if defined _MSC_VER && _MSC_VER <= 1200
+ friend basic_state_machine;
+#else
+ friend class basic_state_machine;
+#endif
 
- size_t_vector_vector _lookup;
- size_t_vector _dfa_alphabet;
- size_t_vector_vector _dfa;
- bool _seen_BOL_assertion;
- bool _seen_EOL_assertion;
+ struct data
+ {
+ // Current iterator info
+ std::size_t dfa;
+ std::size_t states;
+ std::size_t state;
+ std::size_t transitions;
+ std::size_t transition;
+
+ // Current state info
+ bool end_state;
+ std::size_t id;
+ std::size_t goto_dfa;
+ std::size_t bol_index;
+ std::size_t eol_index;
+
+ // Current transition info
+ basic_string_token<CharT> token;
+ std::size_t goto_state;
+
+ data () :
+ dfa (npos),
+ states (0),
+ state (npos),
+ transitions (0),
+ transition (npos),
+ end_state (false),
+ id (npos),
+ goto_dfa (npos),
+ bol_index (npos),
+ eol_index (npos),
+ goto_state (npos)
+ {
+ }
+
+ bool operator == (const data &rhs_) const
+ {
+ return dfa == rhs_.dfa &&
+ states == rhs_.states &&
+ state == rhs_.state &&
+ transitions == rhs_.transitions &&
+ transition == rhs_.transition &&
+ end_state == rhs_.end_state &&
+ id == rhs_.id &&
+ goto_dfa == rhs_.goto_dfa &&
+ bol_index == rhs_.bol_index &&
+ eol_index == rhs_.eol_index &&
+ token == rhs_.token &&
+ transition == rhs_.transition;
+ }
+ };
+
+ iterator () :
+ _sm (0),
+ _dfas (0),
+ _dfa (npos),
+ _states (0),
+ _state (npos),
+ _transitions (0),
+ _transition (npos)
+ {
+ }
+
+ bool operator == (const iterator &rhs_) const
+ {
+ return _dfas == rhs_._dfas && _dfa == rhs_._dfa &&
+ _states == rhs_._states && _state == rhs_._state &&
+ _transitions == rhs_._transitions &&
+ _transition == rhs_._transition;
+ }
+
+ bool operator != (const iterator &rhs_) const
+ {
+ return !(*this == rhs_);
+ }
+
+ data &operator * ()
+ {
+ return _data;
+ }
+
+ data *operator -> ()
+ {
+ return &_data;
+ }
+
+ // Let compiler generate operator = ().
+
+ // prefix version
+ iterator &operator ++ ()
+ {
+ next ();
+ return *this;
+ }
+
+ // postfix version
+ iterator operator ++ (int)
+ {
+ iterator iter_ = *this;
+
+ next ();
+ return iter_;
+ }
+
+ void clear ()
+ {
+ _dfas = _states = _transitions = 0;
+ _dfa = _state = _transition = npos;
+ }
+
+ private:
+ basic_state_machine *_sm;
+ data _data;
+ std::size_t _dfas;
+ std::size_t _dfa;
+ std::size_t _states;
+ std::size_t _state;
+ std::size_t _transitions;
+ std::size_t _transition;
+ typename detail::basic_char_state_machine<CharT>::state::
+ size_t_string_token_map::const_iterator _token_iter;
+ typename detail::basic_char_state_machine<CharT>::state::
+ size_t_string_token_map::const_iterator _token_end;
+
+ void next ()
+ {
+ bool reset_state_ = false;
+
+ if (_transition >= _transitions)
+ {
+ _transition = _data.transition = 0;
+ _data.state = ++_state;
+ reset_state_ = true;
+
+ if (_state >= _states)
+ {
+ ++_dfa;
+
+ if (_dfa >= _dfas)
+ {
+ clear ();
+ reset_state_ = false;
+ }
+ else
+ {
+ _states = _sm->_csm._sm_vector[_dfa].size ();
+ _state = _data.state = 0;
+ }
+ }
+ }
+ else
+ {
+ _data.transition = _transition;
+ }
+
+ if (reset_state_)
+ {
+ const typename detail::basic_char_state_machine<CharT>::
+ state *ptr_ = &_sm->_csm._sm_vector[_dfa][_state];
 
- state_machine () :
+ _transitions = _data.transitions = ptr_->_transitions.size ();
+ _data.end_state = ptr_->_end_state;
+ _data.id = ptr_->_id;
+ _data.goto_dfa = ptr_->_state;
+ _data.bol_index = ptr_->_bol_index;
+ _data.eol_index = ptr_->_eol_index;
+ _token_iter = ptr_->_transitions.begin ();
+ _token_end = ptr_->_transitions.end ();
+ }
+
+ if (_token_iter != _token_end)
+ {
+ _data.token = _token_iter->second;
+ _data.goto_state = _token_iter->first;
+ ++_token_iter;
+ ++_transition;
+ }
+ else
+ {
+ _data.token.clear ();
+ _data.goto_state = npos;
+ }
+ }
+ };
+
+#if defined _MSC_VER && _MSC_VER <= 1200
+ friend iterator;
+#else
+ friend class iterator;
+#endif
+
+ basic_state_machine () :
         _seen_BOL_assertion (false),
         _seen_EOL_assertion (false)
     {
     }
 
- bool empty () const
- {
- return _lookup->empty () && _dfa_alphabet.empty () &&
- _dfa->empty ();
- }
-
     void clear ()
     {
         _lookup.clear ();
@@ -49,19 +236,104 @@
         _dfa.clear ();
         _seen_BOL_assertion = false;
         _seen_EOL_assertion = false;
+ _csm.clear ();
+ }
+
+ bool empty () const
+ {
+ // Don't include _csm in this test, as irrelevant to state.
+ return _lookup->empty () && _dfa_alphabet.empty () &&
+ _dfa->empty ();
+ }
+
+ std::size_t size () const
+ {
+ return _dfa->size ();
+ }
+
+ bool operator == (const basic_state_machine &rhs_) const
+ {
+ // Don't include _csm in this test, as irrelevant to state.
+ return _lookup == rhs_._lookup &&
+ _dfa_alphabet == rhs_._dfa_alphabet &&
+ _dfa == rhs_._dfa &&
+ _seen_BOL_assertion == rhs_._seen_BOL_assertion &&
+ _seen_EOL_assertion == rhs_._seen_EOL_assertion;
+ }
+
+ iterator begin () const
+ {
+ iterator iter_;
+
+ iter_._sm = const_cast<basic_state_machine *>(this);
+ check_for_csm ();
+
+ if (!_csm.empty())
+ {
+ const typename detail::basic_char_state_machine<CharT>::
+ state_vector *ptr_ = &_csm._sm_vector[0];
+
+ iter_._dfas = _csm._sm_vector.size ();
+ iter_._states = iter_._data.states = ptr_->size ();
+ iter_._transitions = iter_._data.transitions =
+ ptr_->front ()._transitions.size ();
+ iter_._dfa = iter_._data.dfa = 0;
+ iter_._state = iter_._data.state = 0;
+ iter_._transition = 0;
+ iter_._data.end_state = ptr_->front ()._end_state;
+ iter_._data.id = ptr_->front ()._id;
+ iter_._data.goto_dfa = ptr_->front ()._state;
+ iter_._data.bol_index = ptr_->front ()._bol_index;
+ iter_._data.eol_index = ptr_->front ()._eol_index;
+ iter_._token_iter = ptr_->front ()._transitions.begin ();
+ iter_._token_end = ptr_->front ()._transitions.end ();
+ ++iter_;
+ }
+
+ return iter_;
     }
 
- void swap (state_machine &sm_)
+ iterator end () const
+ {
+ iterator iter_;
+
+ iter_._sm = const_cast<basic_state_machine *>(this);
+ return iter_;
+ }
+
+ void swap (basic_state_machine &sm_)
     {
         _lookup->swap (*sm_._lookup);
         _dfa_alphabet.swap (sm_._dfa_alphabet);
         _dfa->swap (*sm_._dfa);
         std::swap (_seen_BOL_assertion, sm_._seen_BOL_assertion);
         std::swap (_seen_EOL_assertion, sm_._seen_EOL_assertion);
+ _csm.swap (sm_._csm);
     }
 
- template<typename CharT>
- void human_readable (basic_char_state_machine<CharT> &sm_) const
+// VC++ 6, 7.1 and 8 can't cope with template friend classes!
+#if !(defined _MSC_VER && _MSC_VER < 1500)
+private:
+#endif
+ typedef std::vector<std::size_t> size_t_vector;
+ typedef detail::ptr_vector<size_t_vector> size_t_vector_vector;
+
+ size_t_vector_vector _lookup;
+ size_t_vector _dfa_alphabet;
+ size_t_vector_vector _dfa;
+ bool _seen_BOL_assertion;
+ bool _seen_EOL_assertion;
+ mutable detail::basic_char_state_machine<CharT> _csm;
+
+ void check_for_csm () const
+ {
+ if (_csm.empty ())
+ {
+ human_readable (_csm);
+ }
+ }
+
+ void human_readable (detail::basic_char_state_machine<CharT> &sm_) const
     {
         const std::size_t max_ = sizeof (CharT) == 1 ?
             num_chars : num_wchar_ts;
@@ -98,8 +370,9 @@
             for (std::size_t state_index_ = 1; state_index_ < states_;
                 ++state_index_)
             {
- typename basic_char_state_machine<CharT>::state *state_ =
- &sm_._sm_vector[start_state_index_][state_index_ - 1];
+ typename detail::basic_char_state_machine<CharT>::state
+ *state_ = &sm_._sm_vector[start_state_index_]
+ [state_index_ - 1];
 
                 state_->_end_state = *read_ptr_ != 0;
                 state_->_id = *(read_ptr_ + id_index);
@@ -116,15 +389,15 @@
                     if (transition_ != 0)
                     {
                         const std::size_t i_ = transition_ - 1;
- typename basic_char_state_machine<CharT>::state::
- size_t_string_token_map::iterator iter_ =
+ typename detail::basic_char_state_machine<CharT>::
+ state::size_t_string_token_map::iterator iter_ =
                             state_->_transitions.find (i_);
 
                         if (iter_ == state_->_transitions.end ())
                         {
                             basic_string_token<CharT> token_
                                 (false, chars_[col_index_]);
- typename basic_char_state_machine<CharT>::
+ typename detail::basic_char_state_machine<CharT>::
                                 state::size_t_string_token_pair pair_
                                 (i_, token_);
 
@@ -137,7 +410,7 @@
                     }
                 }
 
- for (typename basic_char_state_machine<CharT>::state::
+ for (typename detail::basic_char_state_machine<CharT>::state::
                     size_t_string_token_map::iterator iter_ =
                     state_->_transitions.begin (),
                     end_ = state_->_transitions.end ();
@@ -150,7 +423,25 @@
             }
         }
     }
+
+#if !(defined _MSC_VER && _MSC_VER < 1500)
+ template<typename ChT, typename Traits>
+ friend class basic_file_input;
+
+ template<typename ChT, typename Traits>
+ friend class basic_generator;
+
+ template<typename FwdIter, typename Traits>
+ friend class basic_input;
+
+ template<typename ChT, class Archive>
+ friend void serialise (basic_state_machine &sm_, Archive &ar_,
+ unsigned int version_);
+#endif
 };
+
+typedef basic_state_machine<char> state_machine;
+typedef basic_state_machine<wchar_t> wstate_machine;
 }
 }
 

Modified: branches/release/boost/spirit/home/support/detail/lexer/string_token.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/string_token.hpp (original)
+++ branches/release/boost/spirit/home/support/detail/lexer/string_token.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
@@ -1,5 +1,5 @@
 // string_token.hpp
-// Copyright (c) 2007 Ben Hanson
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -49,7 +49,7 @@
 
     void normalise ()
     {
- const std::size_t max_chars_ = sizeof(CharT) == 1 ?
+ const std::size_t max_chars_ = sizeof (CharT) == 1 ?
             num_chars : num_wchar_ts;
 
         if (_charset.length () == max_chars_)
@@ -69,7 +69,7 @@
 
     void negate ()
     {
- const std::size_t max_chars_ = sizeof(CharT) == 1 ?
+ const std::size_t max_chars_ = sizeof (CharT) == 1 ?
             num_chars : num_wchar_ts;
         CharT curr_char_ = sizeof (CharT) == 1 ? -128 : 0;
         string temp_;
@@ -135,7 +135,7 @@
 
     void intersect (basic_string_token &rhs_, basic_string_token &overlap_)
     {
- if ((any () && rhs_.any ()) || (_negated == rhs_._negated &&
+ if (any () && rhs_.any () || (_negated == rhs_._negated &&
             !any () && !rhs_.any ()))
         {
             intersect_same_types (rhs_, overlap_);

Deleted: branches/release/boost/spirit/home/support/detail/lexer/tokeniser.hpp
==============================================================================
--- branches/release/boost/spirit/home/support/detail/lexer/tokeniser.hpp 2008-07-15 17:43:21 EDT (Tue, 15 Jul 2008)
+++ (empty file)
@@ -1,298 +0,0 @@
-// examples/tokeniser.hpp
-// Copyright (c) 2007 Ben Hanson
-//
-// Distributed under the Boost Software License, Version 1.0. (See accompanying
-// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
-#ifndef BOOST_LEXER_EXAMPLES_TOKENISER_HPP
-#define BOOST_LEXER_EXAMPLES_TOKENISER_HPP
-
-#include "char_traits.hpp"
-#include "consts.hpp"
-#include <boost/detail/iterator.hpp>
-#include "size_t.hpp"
-#include "state_machine.hpp"
-
-namespace boost
-{
-namespace lexer
-{
-template<typename Iterator, typename Traits =
- char_traits<typename detail::iterator_traits<Iterator>::value_type> >
-struct basic_tokeniser
-{
- static std::size_t next (const state_machine &state_machine_,
- std::size_t &start_state_, const Iterator &start_,
- Iterator &start_token_, const Iterator &end_)
- {
- if (start_token_ == end_) return 0;
-
- again:
- const std::size_t * lookup_ = &state_machine_._lookup[start_state_]->
- front ();
- std::size_t dfa_alphabet_ = state_machine_._dfa_alphabet[start_state_];
- const std::size_t *dfa_ = &state_machine_._dfa[start_state_]->front ();
- const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
- Iterator curr_ = start_token_;
- bool end_state_ = *ptr_ != 0;
- std::size_t id_ = *(ptr_ + id_index);
- Iterator end_token_ = start_token_;
-
- while (curr_ < end_)
- {
- const std::size_t BOL_state_ = ptr_[bol_index];
- const std::size_t EOL_state_ = ptr_[eol_index];
-
- if (BOL_state_ && (start_token_ == start_ ||
- *(start_token_ - 1) == '\n'))
- {
- ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
- }
- else if (EOL_state_ && *curr_ == '\n')
- {
- ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
- }
- else
- {
- const std::size_t state_ =
- ptr_[lookup_[static_cast<typename Traits::index_type>
- (*curr_++)]];
-
- if (state_ == 0)
- {
- break;
- }
-
- ptr_ = &dfa_[state_ * dfa_alphabet_];
- }
-
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + id_index);
- start_state_ = *(ptr_ + state_index);
- end_token_ = curr_;
- }
- }
-
- const std::size_t EOL_state_ = ptr_[eol_index];
-
- if (EOL_state_ && curr_ == end_)
- {
- ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
-
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + id_index);
- start_state_ = *(ptr_ + state_index);
- end_token_ = curr_;
- }
- }
-
- if (end_state_)
- {
- // return longest match
- start_token_ = end_token_;
-
- if (id_ == 0) goto again;
- }
- else
- {
- // No match causes char to be skipped
- ++start_token_;
- id_ = npos;
- }
-
- return id_;
- }
-
- static std::size_t next (const state_machine &state_machine_,
- std::size_t &start_state_, Iterator &start_token_,
- Iterator const &end_)
- {
- if (start_token_ == end_) return 0;
-
- again:
- const std::size_t * lookup_ = &state_machine_._lookup[start_state_]->
- front ();
- std::size_t dfa_alphabet_ = state_machine_._dfa_alphabet[start_state_];
- const std::size_t *dfa_ = &state_machine_._dfa[start_state_]->front ();
- const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
- Iterator curr_ = start_token_;
- bool end_state_ = *ptr_ != 0;
- std::size_t id_ = *(ptr_ + id_index);
- Iterator end_token_ = start_token_;
-
- while (curr_ < end_)
- {
- const std::size_t state_ = ptr_[lookup_[static_cast
- <typename Traits::index_type>(*curr_++)]];
-
- if (state_ == 0)
- {
- break;
- }
-
- ptr_ = &dfa_[state_ * dfa_alphabet_];
-
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + id_index);
- start_state_ = *(ptr_ + state_index);
- end_token_ = curr_;
- }
- }
-
- if (end_state_)
- {
- // return longest match
- start_token_ = end_token_;
-
- if (id_ == 0) goto again;
- }
- else
- {
- // No match causes char to be skipped
- ++start_token_;
- id_ = npos;
- }
-
- return id_;
- }
-
- static std::size_t next (const std::size_t * const lookup_,
- const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
- Iterator const &start_, Iterator &start_token_,
- Iterator const &end_)
- {
- if (start_token_ == end_) return 0;
-
- const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
- Iterator curr_ = start_token_;
- bool end_state_ = *ptr_ != 0;
- std::size_t id_ = *(ptr_ + id_index);
- Iterator end_token_ = start_token_;
-
- while (curr_ < end_)
- {
- const std::size_t BOL_state_ = ptr_[bol_index];
- const std::size_t EOL_state_ = ptr_[eol_index];
-
- if (BOL_state_ && (start_token_ == start_ ||
- *(start_token_ - 1) == '\n'))
- {
- ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
- }
- else if (EOL_state_ && *curr_ == '\n')
- {
- ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
- }
- else
- {
- const std::size_t state_ =
- ptr_[lookup_[static_cast<typename Traits::index_type>
- (*curr_++)]];
-
- if (state_ == 0)
- {
- break;
- }
-
- ptr_ = &dfa_[state_ * dfa_alphabet_];
- }
-
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + id_index);
- end_token_ = curr_;
- }
- }
-
- const std::size_t EOL_state_ = ptr_[eol_index];
-
- if (EOL_state_ && curr_ == end_)
- {
- ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
-
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + id_index);
- end_token_ = curr_;
- }
- }
-
- if (end_state_)
- {
- // return longest match
- start_token_ = end_token_;
- }
- else
- {
- // No match causes char to be skipped
- ++start_token_;
- id_ = npos;
- }
-
- return id_;
- }
-
- static std::size_t next (const std::size_t * const lookup_,
- const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
- Iterator &start_token_, Iterator const &end_)
- {
- if (start_token_ == end_) return 0;
-
- const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
- Iterator curr_ = start_token_;
- bool end_state_ = *ptr_ != 0;
- std::size_t id_ = *(ptr_ + id_index);
- Iterator end_token_ = start_token_;
-
- while (curr_ < end_)
- {
- const std::size_t state_ = ptr_[lookup_[static_cast
- <typename Traits::index_type>(*curr_++)]];
-
- if (state_ == 0)
- {
- break;
- }
-
- ptr_ = &dfa_[state_ * dfa_alphabet_];
-
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + id_index);
- end_token_ = curr_;
- }
- }
-
- if (end_state_)
- {
- // return longest match
- start_token_ = end_token_;
- }
- else
- {
- // No match causes char to be skipped
- ++start_token_;
- id_ = npos;
- }
-
- return id_;
- }
-};
-
-typedef basic_tokeniser<std::string::iterator> iter_tokeniser;
-typedef basic_tokeniser<std::wstring::iterator> iter_wtokeniser;
-typedef basic_tokeniser<const char *> ptr_tokeniser;
-typedef basic_tokeniser<const wchar_t *> ptr_wtokeniser;
-
-}
-}
-
-#endif


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk