Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r53178 - in trunk: boost/spirit/home/lex boost/spirit/home/lex/detail boost/spirit/home/lex/lexer boost/spirit/home/lex/lexer/lexertl boost/spirit/home/support/detail/lexer boost/spirit/home/support/detail/lexer/conversion boost/spirit/home/support/detail/lexer/parser boost/spirit/home/support/detail/lexer/parser/tree libs/spirit/example/lex/static_lexer
From: hartmut.kaiser_at_[hidden]
Date: 2009-05-22 12:34:32


Author: hkaiser
Date: 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
New Revision: 53178
URL: http://svn.boost.org/trac/boost/changeset/53178

Log:
Spirit: Updated Spirit.Lex to support unique ids provided by the underlying lexertl implementation. Speeds up things quite a bit...
Text files modified:
   trunk/boost/spirit/home/lex/detail/sequence_function.hpp | 22 +++++
   trunk/boost/spirit/home/lex/lexer/action.hpp | 7 ++
   trunk/boost/spirit/home/lex/lexer/char_token_def.hpp | 9 +
   trunk/boost/spirit/home/lex/lexer/lexer.hpp | 12 +++
   trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp | 128 +++++++++++++++------------------------
   trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp | 111 +++++++++++++++++-----------------
   trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp | 88 ++++-----------------------
   trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp | 89 +++++++++++++++------------
   trunk/boost/spirit/home/lex/lexer/lexertl/static_functor.hpp | 76 ++++++++++++-----------
   trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp | 9 ++
   trunk/boost/spirit/home/lex/lexer/sequence.hpp | 11 ++
   trunk/boost/spirit/home/lex/lexer/string_token_def.hpp | 3
   trunk/boost/spirit/home/lex/lexer/token_def.hpp | 7 +
   trunk/boost/spirit/home/lex/lexer/token_set.hpp | 36 +++++++---
   trunk/boost/spirit/home/lex/reference.hpp | 8 ++
   trunk/boost/spirit/home/support/detail/lexer/consts.hpp | 8 +-
   trunk/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp | 2
   trunk/boost/spirit/home/support/detail/lexer/debug.hpp | 12 +++
   trunk/boost/spirit/home/support/detail/lexer/file_input.hpp | 31 +++++++--
   trunk/boost/spirit/home/support/detail/lexer/generator.hpp | 27 +++++---
   trunk/boost/spirit/home/support/detail/lexer/input.hpp | 70 +++++++++++++++++----
   trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp | 10 +-
   trunk/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp | 10 ++
   trunk/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp | 5 +
   trunk/boost/spirit/home/support/detail/lexer/rules.hpp | 127 +++++++++++++++++++++++++++++++++------
   trunk/boost/spirit/home/support/detail/lexer/state_machine.hpp | 6 +
   trunk/libs/spirit/example/lex/static_lexer/word_count_static.hpp | 101 +++++++++++++++++--------------
   27 files changed, 607 insertions(+), 418 deletions(-)

Modified: trunk/boost/spirit/home/lex/detail/sequence_function.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/detail/sequence_function.hpp (original)
+++ trunk/boost/spirit/home/lex/detail/sequence_function.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -16,22 +16,38 @@
 namespace boost { namespace spirit { namespace lex { namespace detail
 {
     template <typename LexerDef, typename String>
- struct sequence_function
+ struct sequence_collect_function
     {
- sequence_function(LexerDef& def_, String const& state_)
+ sequence_collect_function(LexerDef& def_, String const& state_)
           : def(def_), state(state_) {}
 
         template <typename Component>
         bool operator()(Component const& component) const
         {
             component.collect(def, state);
- return false; // execute for all sequence elements
+ return false; // execute for all sequence elements
         }
 
         LexerDef& def;
         String const& state;
     };
 
+ template <typename LexerDef>
+ struct sequence_add_actions_function
+ {
+ sequence_add_actions_function(LexerDef& def_)
+ : def(def_) {}
+
+ template <typename Component>
+ bool operator()(Component const& component) const
+ {
+ component.add_actions(def);
+ return false; // execute for all sequence elements
+ }
+
+ LexerDef& def;
+ };
+
 }}}}
 
 #endif

Modified: trunk/boost/spirit/home/lex/lexer/action.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/action.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/action.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -32,6 +32,13 @@
             // first collect the token definition information for the token_def
             // this action is attached to
             subject.collect(lexdef, state);
+ }
+
+ template <typename LexerDef>
+ void add_actions(LexerDef& lexdef) const
+ {
+ // call to add all actions attached further down the hierarchy
+ subject.add_actions(lexdef);
 
             // retrieve the id of the associated token_def and register the
             // given semantic action with the lexer instance

Modified: trunk/boost/spirit/home/lex/lexer/char_token_def.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/char_token_def.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/char_token_def.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -61,18 +61,21 @@
     {
         typedef typename CharEncoding::char_type char_type;
 
- char_token_def(char_type ch) : ch (ch) {}
+ char_token_def(char_type ch) : ch(ch), unique_id_(~0) {}
 
         template <typename LexerDef, typename String>
         void collect(LexerDef& lexdef, String const& state) const
         {
- lexdef.add_token (state.c_str(), ch,
- static_cast<std::size_t>(ch));
+ lexdef.add_token (state.c_str(), ch, static_cast<std::size_t>(ch));
         }
 
+ template <typename LexerDef>
+ void add_actions(LexerDef& lexdef) const {}
+
         std::size_t id() const { return static_cast<std::size_t>(ch); }
 
         char_type ch;
+ mutable std::size_t unique_id_;
     };
 
     ///////////////////////////////////////////////////////////////////////////

Modified: trunk/boost/spirit/home/lex/lexer/lexer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexer.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexer.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -197,12 +197,22 @@
             };
             friend struct pattern_adder;
 
+ private:
+ // Helper function to invoke the necessary 2 step compilation
+ // process on token definition expressions
+ template <typename TokenExpr>
+ void compile2pass(TokenExpr const& expr)
+ {
+ expr.collect(def, state);
+ expr.add_actions(def);
+ }
+
         public:
             ///////////////////////////////////////////////////////////////////
             template <typename Expr>
             void define(Expr const& expr)
             {
- compile<lex::domain>(expr).collect(def, state);
+ compile2pass(compile<lex::domain>(expr));
             }
 
             lexer_def_(LexerDef& def_, string_type const& state_)

Modified: trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -57,14 +57,15 @@
               , rules(data_.rules_)
               , first(first_), last(last_) {}
 
- std::size_t next(Iterator& end)
+ std::size_t next(Iterator& end, std::size_t& unique_id)
             {
                 typedef basic_iterator_tokeniser<Iterator> tokenizer;
- return tokenizer::next(state_machine, first, end, last);
+ return tokenizer::next(state_machine, first, end, last, unique_id);
             }
 
             // nothing to invoke, so this is empty
- bool invoke_actions(std::size_t, std::size_t, Iterator const&)
+ bool invoke_actions(std::size_t, std::size_t, std::size_t
+ , Iterator const&)
             {
                 return true; // always accept
             }
@@ -96,11 +97,11 @@
             data (IterData const& data_, Iterator& first_, Iterator const& last_)
               : base_type(data_, first_, last_), state(0) {}
 
- std::size_t next(Iterator& end)
+ std::size_t next(Iterator& end, std::size_t& unique_id)
             {
                 typedef basic_iterator_tokeniser<Iterator> tokenizer;
                 return tokenizer::next(this->state_machine, state,
- this->first, end, this->last);
+ this->first, end, this->last, unique_id);
             }
 
             std::size_t& get_state() { return state; }
@@ -135,60 +136,36 @@
 
             typedef void functor_type(iterpair_type, std::size_t, bool&, data&);
             typedef boost::function<functor_type> functor_wrapper_type;
- struct action_key
- {
- action_key(std::size_t id, std::size_t state)
- : id_(id), state_(state) {}
- action_key(std::pair<std::size_t, std::size_t> const& k)
- : id_(k.first), state_(k.second) {}
-
- friend bool operator<(action_key const& lhs, action_key const& rhs)
- {
- return lhs.id_ < rhs.id_ ||
- (lhs.id_ == rhs.id_ && lhs.state_ < rhs.state_);
- }
-
- std::size_t id_;
- std::size_t state_;
- };
- typedef std::multimap<action_key, functor_wrapper_type>
+ typedef std::vector<std::vector<functor_wrapper_type> >
                 semantic_actions_type;
 
- typedef detail::wrap_action<functor_wrapper_type, iterpair_type, data>
- wrap_action_type;
+ typedef detail::wrap_action<functor_wrapper_type
+ , iterpair_type, data> wrap_action_type;
 
             template <typename IterData>
             data (IterData const& data_, Iterator& first_, Iterator const& last_)
               : base_type(data_, first_, last_)
- , actions(data_.actions_) {}
+ , actions_(data_.actions_) {}
 
             // invoke attached semantic actions, if defined
- bool invoke_actions(std::size_t id, std::size_t state
- , Iterator const& end)
+ bool invoke_actions(std::size_t state, std::size_t id
+ , std::size_t unique_id, Iterator const& end)
             {
- if (actions.empty())
- return true; // nothing to invoke, continue with 'match'
+ if (state >= actions_.size())
+ return true; // no action defined for this state
 
- iterpair_type itp(this->first, end);
- bool match = true;
+ std::vector<functor_wrapper_type> const& actions = actions_[state];
 
- typedef typename semantic_actions_type::const_iterator
- iterator_type;
+ if (unique_id >= actions.size() || !actions[unique_id])
+ return true; // nothing to invoke, continue with 'match'
 
- std::pair<iterator_type, iterator_type> p =
- actions.equal_range(action_key(id, state));
-
- while (p.first != p.second)
- {
- ((*p.first).second)(itp, id, match, *this);
- if (!match)
- return false; // return a 'no-match'
- ++p.first;
- }
- return true; // normal execution
+ iterpair_type itp(this->first, end);
+ bool match = true;
+ actions[unique_id](itp, id, match, *this);
+ return match;
             }
 
- semantic_actions_type const& actions;
+ semantic_actions_type const& actions_;
         };
     }
 
@@ -297,7 +274,8 @@
 #endif
 
             Iterator end = data.first;
- std::size_t id = data.next(end);
+ std::size_t unique_id = boost::lexer::npos;
+ std::size_t id = data.next(end, unique_id);
 
             if (boost::lexer::npos == id) { // no match
 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
@@ -309,47 +287,43 @@
                 std::cerr << "Not matched, in state: " << data.state
                           << ", lookahead: >" << next << "<" << std::endl;
 #endif
- result = result_type(0);
+ return result = result_type(0);
             }
             else if (0 == id) { // EOF reached
 #if defined(BOOST_SPIRIT_STATIC_EOF)
- result = eof;
+ return result = eof;
 #else
- result = mp.ftor.eof;
+ return result = mp.ftor.eof;
 #endif
             }
- else {
+
 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
- {
- std::string next;
- Iterator it = end;
- for (std::size_t i = 0; i < 10 && it != data.last; ++it, ++i)
- next += *it;
-
- std::cerr << "Matched: " << id << ", in state: "
- << data.state << ", string: >"
- << std::basic_string<char_type>(data.first, end) << "<"
- << ", lookahead: >" << next << "<" << std::endl;
- }
+ {
+ std::string next;
+ Iterator it = end;
+ for (std::size_t i = 0; i < 10 && it != data.last; ++it, ++i)
+ next += *it;
+
+ std::cerr << "Matched: " << id << ", in state: "
+ << data.state << ", string: >"
+ << std::basic_string<char_type>(data.first, end) << "<"
+ << ", lookahead: >" << next << "<" << std::endl;
+ }
 #endif
- // invoke_actions might change state
- std::size_t state = data.get_state();
+ // invoke_actions might change state
+ std::size_t state = data.get_state();
 
- // invoke attached semantic actions, if defined
- if (!data.invoke_actions(id, state, end))
- {
- // one of the semantic actions signaled no-match
- result = result_type(0);
- }
- else
- {
- // return matched token, advancing 'data.first' past the
- // matched sequence
- assign_on_exit<Iterator> on_exit(data.first, end);
- result = result_type(id, state, data.first, end);
- }
+ // invoke attached semantic actions, if defined
+ if (!data.invoke_actions(state, id, unique_id, end))
+ {
+ // one of the semantic actions signaled no-match
+ return result = result_type(0);
             }
- return result;
+
+ // return matched token, advancing 'data.first' past the
+ // matched sequence
+ assign_on_exit<Iterator> on_exit(data.first, end);
+ return result = result_type(id, state, data.first, end);
         }
 
         // set_state are propagated up to the iterator interface, allowing to

Modified: trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -68,6 +68,10 @@
         }
         os_ << "};\n\n";
 
+ os_ << "// this variable defines the number of lexer states\n";
+ os_ << "std::size_t const lexer_state_count = "
+ << rules_.statemap().size() << ";\n\n";
+
         return true;
     }
 
@@ -137,11 +141,12 @@
             os_ << "\n ";
         }
 
- os_ << "Iterator &start_token_, Iterator const& end_)\n";
+ os_ << "Iterator &start_token_, Iterator const& end_, ";
+ os_ << "std::size_t& unique_id_)\n";
         os_ << "{\n";
- os_ << " enum {end_state_index, id_index, state_index, bol_index, "
- "eol_index,\n";
- os_ << " dead_state_index, dfa_offset};\n";
+ os_ << " enum {end_state_index, id_index, unique_id_index, "
+ "state_index, bol_index,\n";
+ os_ << " eol_index, dead_state_index, dfa_offset};\n";
         os_ << " static const std::size_t npos = static_cast"
           "<std::size_t>(~0);\n";
 
@@ -156,7 +161,7 @@
                 std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front();
 
                 os_ << " static const std::size_t lookup" << state_
- << "_[" << lookups_ << "] = {";
+ << "_[" << lookups_ << "] = {\n ";
 
                 for (/**/; i_ < count_; ++i_)
                 {
@@ -171,16 +176,16 @@
 
                     if (i_ < count_ - 1)
                     {
- os_ << "," << std::endl << " ";
+ os_ << ",\n ";
                     }
 
                     j_ = 1;
                 }
 
- os_ << "};\n";
+ os_ << " };\n";
                 count_ = sm_.data()._dfa[state_]->size ();
                 os_ << " static const std::size_t dfa" << state_ << "_[" <<
- count_ << "] = {";
+ count_ << "] = {\n ";
                 count_ /= 8;
 
                 for (i_ = 0; i_ < count_; ++i_)
@@ -196,7 +201,7 @@
 
                     if (i_ < count_ - 1)
                     {
- os_ << "," << std::endl << " ";
+ os_ << ",\n ";
                     }
                 }
 
@@ -218,25 +223,23 @@
                     }
                 }
 
- os_ << "};\n";
+ os_ << " };\n";
             }
 
             std::size_t count_ = sm_.data()._dfa_alphabet.size();
             std::size_t i_ = 1;
 
             os_ << " static const std::size_t *lookup_arr_[" << count_ <<
- "] = {";
-
- os_ << "lookup0_";
+ "] = { lookup0_";
 
             for (i_ = 1; i_ < count_; ++i_)
             {
                 os_ << ", " << "lookup" << i_ << "_";
             }
 
- os_ << "};\n";
+ os_ << " };\n";
             os_ << " static const std::size_t dfa_alphabet_arr_[" <<
- count_ << "] = {";
+ count_ << "] = { ";
 
             os_ << sm_.data()._dfa_alphabet.front ();
 
@@ -245,9 +248,9 @@
                 os_ << ", " << sm_.data()._dfa_alphabet[i_];
             }
 
- os_ << "};\n";
+ os_ << " };\n";
             os_ << " static const std::size_t *dfa_arr_[" << count_ <<
- "] = {";
+ "] = { ";
             os_ << "dfa0_";
 
             for (i_ = 1; i_ < count_; ++i_)
@@ -255,7 +258,7 @@
                 os_ << ", " << "dfa" << i_ << "_";
             }
 
- os_ << "};\n";
+ os_ << " };\n";
         }
         else
         {
@@ -266,7 +269,7 @@
             std::size_t count_ = lookups_ / 8;
 
             os_ << " static const std::size_t lookup_[";
- os_ << sm_.data()._lookup[0]->size() << "] = {";
+ os_ << sm_.data()._lookup[0]->size() << "] = {\n ";
 
             for (; i_ < count_; ++i_)
             {
@@ -281,17 +284,17 @@
 
                 if (i_ < count_ - 1)
                 {
- os_ << "," << std::endl << " ";
+ os_ << ",\n ";
                 }
 
                 j_ = 1;
             }
 
- os_ << "};\n";
+ os_ << " };\n";
             os_ << " static const std::size_t dfa_alphabet_ = " <<
               sm_.data()._dfa_alphabet.front () << ";\n";
             os_ << " static const std::size_t dfa_[" <<
- sm_.data()._dfa[0]->size () << "] = {";
+ sm_.data()._dfa[0]->size () << "] = {\n ";
                 count_ = sm_.data()._dfa[0]->size () / 8;
 
             for (i_ = 0; i_ < count_; ++i_)
@@ -307,7 +310,7 @@
 
                 if (i_ < count_ - 1)
                 {
- os_ << "," << std::endl << " ";
+ os_ << ",\n ";
                 }
             }
 
@@ -330,7 +333,7 @@
                 }
             }
 
- os_ << "};\n";
+ os_ << " };\n";
         }
 
         os_ << "\n if (start_token_ == end_) return 0;\n\n";
@@ -347,24 +350,20 @@
         os_ << " Iterator curr_ = start_token_;\n";
         os_ << " bool end_state_ = *ptr_ != 0;\n";
         os_ << " std::size_t id_ = *(ptr_ + id_index);\n";
- os_ << " Iterator end_token_ = start_token_;\n";
- os_ << '\n';
+ os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n";
+ os_ << " Iterator end_token_ = start_token_;\n\n";
+
         os_ << " while (curr_ != end_)\n";
         os_ << " {\n";
 
         if (sm_.data()._seen_BOL_assertion)
         {
- os_ << " std::size_t const BOL_state_ = ptr_[bol_index];\n";
+ os_ << " std::size_t const BOL_state_ = ptr_[bol_index];\n\n";
         }
 
         if (sm_.data()._seen_EOL_assertion)
         {
- os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n";
- }
-
- if (sm_.data()._seen_BOL_assertion || sm_.data()._seen_EOL_assertion)
- {
- os_ << '\n';
+ os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
         }
 
         if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion)
@@ -396,7 +395,7 @@
             os_ << " if (state_ == 0) break;\n";
             os_ << '\n';
             os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
- os_ << " }\n";
+ os_ << " }\n\n";
         }
         else if (sm_.data()._seen_BOL_assertion)
         {
@@ -411,8 +410,8 @@
 
             if (lookups_ == 256)
             {
- os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
- os_ << " (*curr_++)]];\n";
+ os_ << " ptr_[lookup_[static_cast<unsigned char>"
+ "(*curr_++)]];\n";
             }
             else
             {
@@ -423,7 +422,7 @@
             os_ << " if (state_ == 0) break;\n";
             os_ << '\n';
             os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
- os_ << " }\n";
+ os_ << " }\n\n";
         }
         else if (sm_.data()._seen_EOL_assertion)
         {
@@ -437,8 +436,8 @@
 
             if (lookups_ == 256)
             {
- os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
- os_ << " (*curr_++)]];\n";
+ os_ << " ptr_[lookup_[static_cast<unsigned char>"
+ "(*curr_++)]];\n";
             }
             else
             {
@@ -449,7 +448,7 @@
             os_ << " if (state_ == 0) break;\n";
             os_ << '\n';
             os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
- os_ << " }\n";
+ os_ << " }\n\n";
         }
         else
         {
@@ -457,8 +456,8 @@
 
             if (lookups_ == 256)
             {
- os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
- os_ << " (*curr_++)]];\n";
+ os_ << " ptr_[lookup_[static_cast<unsigned char>"
+ "(*curr_++)]];\n";
             }
             else
             {
@@ -468,14 +467,14 @@
             os_ << '\n';
             os_ << " if (state_ == 0) break;\n";
             os_ << '\n';
- os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+ os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n";
         }
 
- os_ << '\n';
         os_ << " if (*ptr_)\n";
         os_ << " {\n";
         os_ << " end_state_ = true;\n";
         os_ << " id_ = *(ptr_ + id_index);\n";
+ os_ << " uid_ = *(ptr_ + unique_id_index);\n";
 
         if (dfas_ > 1)
         {
@@ -484,21 +483,21 @@
 
         os_ << " end_token_ = curr_;\n";
         os_ << " }\n";
- os_ << " }\n";
- os_ << '\n';
+ os_ << " }\n\n";
 
         if (sm_.data()._seen_EOL_assertion)
         {
- os_ << " const std::size_t EOL_state_ = ptr_[eol_index];\n";
- os_ << '\n';
+ os_ << " const std::size_t EOL_state_ = ptr_[eol_index];\n\n";
+
             os_ << " if (EOL_state_ && curr_ == end_)\n";
             os_ << " {\n";
- os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
- os_ << '\n';
+ os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n";
+
             os_ << " if (*ptr_)\n";
             os_ << " {\n";
             os_ << " end_state_ = true;\n";
             os_ << " id_ = *(ptr_ + id_index);\n";
+ os_ << " uid_ = *(ptr_ + unique_id_index);\n";
 
             if (dfas_ > 1)
             {
@@ -507,8 +506,7 @@
 
             os_ << " end_token_ = curr_;\n";
             os_ << " }\n";
- os_ << " }\n";
- os_ << '\n';
+ os_ << " }\n\n";
         }
 
         os_ << " if (end_state_)\n";
@@ -518,7 +516,6 @@
 
         if (dfas_ > 1)
         {
- os_ << '\n';
             os_ << " if (id_ == 0) goto again;\n";
         }
 
@@ -533,8 +530,10 @@
         }
 
         os_ << " id_ = npos;\n";
- os_ << " }\n";
- os_ << '\n';
+ os_ << " uid_ = npos;\n";
+ os_ << " }\n\n";
+
+ os_ << " unique_id_ = uid_;\n";
         os_ << " return id_;\n";
         os_ << "}\n\n";
 
@@ -555,7 +554,7 @@
     {
         if (!lexer.init_dfa())
             return false;
- return detail::generate_cpp(lexer.state_machine, lexer.rules, os
+ return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os
           , name_suffix, false, false);
     }
 

Modified: trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -24,82 +24,11 @@
         typedef typename boost::detail::iterator_traits<Iterator>::value_type
             char_type;
 
-// static std::size_t next (const std::size_t * const lookup_,
-// std::size_t const dfa_alphabet_, const std::size_t * const dfa_,
-// Iterator const& start_, Iterator &start_token_,
-// Iterator const& end_)
-// {
-// if (start_token_ == end_) return 0;
-//
-// const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
-// Iterator curr_ = start_token_;
-// bool end_state_ = *ptr_ != 0;
-// std::size_t id_ = *(ptr_ + lexer::id_index);
-// Iterator end_token_ = start_token_;
-//
-// while (curr_ != end_)
-// {
-// std::size_t const BOL_state_ = ptr_[lexer::bol_index];
-// std::size_t const EOL_state_ = ptr_[lexer::eol_index];
-//
-// if (BOL_state_ && (start_token_ == start_ ||
-// *(start_token_ - 1) == '\n'))
-// {
-// ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
-// }
-// else if (EOL_state_ && *curr_ == '\n')
-// {
-// ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
-// }
-// else
-// {
-// std::size_t const state_ = ptr_[lookup_[*curr_++]];
-//
-// if (state_ == 0)
-// {
-// break;
-// }
-//
-// ptr_ = &dfa_[state_ * dfa_alphabet_];
-// }
-//
-// if (*ptr_)
-// {
-// end_state_ = true;
-// id_ = *(ptr_ + lexer::id_index);
-// end_token_ = curr_;
-// }
-// }
-//
-// const std::size_t EOL_state_ = ptr_[lexer::eol_index];
-//
-// if (EOL_state_ && curr_ == end_)
-// {
-// ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
-//
-// if (*ptr_)
-// {
-// end_state_ = true;
-// id_ = *(ptr_ + lexer::id_index);
-// end_token_ = curr_;
-// }
-// }
-//
-// if (end_state_) {
-// // return longest match
-// start_token_ = end_token_;
-// }
-// else {
-// id_ = lexer::npos;
-// }
-//
-// return id_;
-// }
-
         static std::size_t next (
             boost::lexer::basic_state_machine<char_type> const& state_machine_
           , std::size_t &dfa_state_, Iterator const& start_
- , Iterator &start_token_, Iterator const& end_)
+ , Iterator &start_token_, Iterator const& end_
+ , std::size_t& unique_id_)
         {
             if (start_token_ == end_) return 0;
 
@@ -112,6 +41,7 @@
             Iterator curr_ = start_token_;
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + boost::lexer::id_index);
+ std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
             Iterator end_token_ = start_token_;
 
             while (curr_ != end_)
@@ -154,6 +84,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + boost::lexer::id_index);
+ uid_ = *(ptr_ + boost::lexer::unique_id_index);
                     dfa_state_ = *(ptr_ + boost::lexer::state_index);
                     end_token_ = curr_;
                 }
@@ -169,6 +100,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + boost::lexer::id_index);
+ uid_ = *(ptr_ + boost::lexer::unique_id_index);
                     dfa_state_ = *(ptr_ + boost::lexer::state_index);
                     end_token_ = curr_;
                 }
@@ -183,15 +115,18 @@
             }
             else {
                 id_ = boost::lexer::npos;
+ uid_ = boost::lexer::npos;
             }
 
+ unique_id_ = uid_;
             return id_;
         }
 
         ///////////////////////////////////////////////////////////////////////
         static std::size_t next (
             boost::lexer::basic_state_machine<char_type> const& state_machine_
- , Iterator const& start_, Iterator &start_token_, Iterator const& end_)
+ , Iterator const& start_, Iterator &start_token_, Iterator const& end_
+ , std::size_t& unique_id_)
         {
             if (start_token_ == end_) return 0;
 
@@ -202,6 +137,7 @@
             Iterator curr_ = start_token_;
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + boost::lexer::id_index);
+ std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
             Iterator end_token_ = start_token_;
 
             while (curr_ != end_)
@@ -244,6 +180,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + boost::lexer::id_index);
+ uid_ = *(ptr_ + boost::lexer::unique_id_index);
                     end_token_ = curr_;
                 }
             }
@@ -258,6 +195,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + boost::lexer::id_index);
+ uid_ = *(ptr_ + boost::lexer::unique_id_index);
                     end_token_ = curr_;
                 }
             }
@@ -268,8 +206,10 @@
             }
             else {
                 id_ = boost::lexer::npos;
+ uid_ = boost::lexer::npos;
             }
 
+ unique_id_ = uid_;
             return id_;
         }
     };

Modified: trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -117,8 +117,7 @@
 
         std::size_t add_state(char_type const* state)
         {
- rules.add_state(state);
- return rules.state(state);
+ return rules.add_state(state);
         }
         string_type initial_state() const
         {
@@ -194,7 +193,7 @@
         // operator_bool() is needed for the safe_bool base class
         operator typename safe_bool<lexer>::result_type() const
         {
- return safe_bool<lexer>()(initialized_dfa);
+ return safe_bool<lexer>()(initialized_dfa_);
         }
 
         typedef typename boost::detail::iterator_traits<Iterator>::value_type
@@ -225,7 +224,7 @@
             if (!init_dfa())
                 return iterator_type();
 
- iterator_data_type iterator_data = { state_machine, rules, actions };
+ iterator_data_type iterator_data = { state_machine_, rules_, actions_ };
             return iterator_type(iterator_data, first, last);
         }
 
@@ -249,10 +248,10 @@
         }
 
         // Lexer instances can be created by means of a derived class only.
- lexer(unsigned int flags_)
- : initialized_dfa(false), flags(map_flags(flags_))
+ lexer(unsigned int flags)
+ : flags_(map_flags(flags)), initialized_dfa_(false)
         {
- rules.flags(flags);
+ rules_.flags(flags_);
         }
 
     public:
@@ -261,15 +260,15 @@
             std::size_t token_id)
         {
             add_state(state);
- rules.add(state, detail::escape(tokendef), token_id, state);
- initialized_dfa = false;
+ rules_.add(state, detail::escape(tokendef), token_id, state);
+ initialized_dfa_ = false;
         }
         void add_token(char_type const* state, string_type const& tokendef,
             std::size_t token_id)
         {
             add_state(state);
- rules.add(state, tokendef, token_id, state);
- initialized_dfa = false;
+ rules_.add(state, tokendef, token_id, state);
+ initialized_dfa_ = false;
         }
 
         // Allow a token_set to be associated with this lexer instance. This
@@ -278,8 +277,8 @@
         void add_token(char_type const* state, token_set const& tokset)
         {
             add_state(state);
- rules.add(state, tokset.get_rules());
- initialized_dfa = false;
+ rules_.add(state, tokset.get_rules());
+ initialized_dfa_ = false;
         }
 
         // Allow to associate a whole lexer instance with another lexer
@@ -287,12 +286,12 @@
         // lexer into this instance.
         template <typename Token_, typename Iterator_, typename Functor_
           , typename TokenSet_>
- void add_token(char_type const* state
+ std::size_t add_token(char_type const* state
           , lexer<Token_, Iterator_, Functor_, TokenSet_> const& lexer_def)
         {
             add_state(state);
- rules.add(state, lexer_def.get_rules());
- initialized_dfa = false;
+ rules_.add(state, lexer_def.get_rules());
+ initialized_dfa_ = false;
         }
 
         // interface for pattern definition management
@@ -300,32 +299,31 @@
             string_type const& patterndef)
         {
             add_state(state);
- rules.add_macro(name.c_str(), patterndef);
- initialized_dfa = false;
+ rules_.add_macro(name.c_str(), patterndef);
+ initialized_dfa_ = false;
         }
 
- boost::lexer::rules const& get_rules() const { return rules; }
+ boost::lexer::rules const& get_rules() const { return rules_; }
 
         void clear(char_type const* state)
         {
- std::size_t s = rules.state(state);
+ std::size_t s = rules_.state(state);
             if (boost::lexer::npos != s)
- rules.clear(state);
- initialized_dfa = false;
+ rules_.clear(state);
+ initialized_dfa_ = false;
         }
         std::size_t add_state(char_type const* state)
         {
- std::size_t stateid = rules.state(state);
+ std::size_t stateid = rules_.state(state);
             if (boost::lexer::npos == stateid) {
- rules.add_state(state);
- stateid = rules.state(state);
- initialized_dfa = false;
+ stateid = rules_.add_state(state);
+ initialized_dfa_ = false;
             }
             return stateid;
         }
         string_type initial_state() const
         {
- return string_type(rules.initial());
+ return string_type(rules_.initial());
         }
 
         // Register a semantic action with the given id
@@ -341,32 +339,43 @@
                 value_type;
             typedef typename Functor::wrap_action_type wrapper_type;
 
- actions.insert(value_type(std::make_pair(id, state)
- , wrapper_type::call(act)));
+ if (actions_.size() <= state)
+ actions_.resize(state + 1);
+
+ std::size_t unique_id = rules_.retrieve_id(state, id);
+ BOOST_ASSERT(boost::lexer::npos != unique_id);
+
+ value_type& actions (actions_[state]);
+ if (actions.size() <= unique_id)
+ actions.resize(unique_id + 1);
+
+ actions[unique_id] = wrapper_type::call(act);
         }
 
         bool init_dfa() const
         {
- if (!initialized_dfa) {
- state_machine.clear();
+ if (!initialized_dfa_) {
+ state_machine_.clear();
                 typedef boost::lexer::basic_generator<char_type> generator;
- generator::build (rules, state_machine);
- generator::minimise (state_machine);
+ generator::build (rules_, state_machine_);
+ generator::minimise (state_machine_);
 
 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
- boost::lexer::debug::dump(state_machine, std::cerr);
+ boost::lexer::debug::dump(state_machine_, std::cerr);
 #endif
- initialized_dfa = true;
+ initialized_dfa_ = true;
             }
             return true;
         }
 
     private:
- mutable boost::lexer::basic_state_machine<char_type> state_machine;
- boost::lexer::basic_rules<char_type> rules;
- typename Functor::semantic_actions_type actions;
- mutable bool initialized_dfa;
- boost::lexer::regex_flags flags;
+ // lexertl specific data
+ mutable boost::lexer::basic_state_machine<char_type> state_machine_;
+ boost::lexer::basic_rules<char_type> rules_;
+ boost::lexer::regex_flags flags_;
+
+ typename Functor::semantic_actions_type actions_;
+ mutable bool initialized_dfa_;
 
         template <typename Lexer>
         friend bool generate_static(Lexer const&, std::ostream&, char const*);

Modified: trunk/boost/spirit/home/lex/lexer/lexertl/static_functor.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/static_functor.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/static_functor.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -49,7 +49,7 @@
             char_type;
 
             typedef std::size_t (*next_token_functor)(std::size_t&,
- Iterator const&, Iterator&, Iterator const&);
+ Iterator const&, Iterator&, Iterator const&, std::size_t&);
 
             typedef unused_type semantic_actions_type;
 
@@ -62,15 +62,15 @@
               : next_token(data_.next_), first(first_), last(last_)
             {}
 
- std::size_t next(Iterator& end)
+ std::size_t next(Iterator& end, std::size_t& unique_id)
             {
- typedef basic_iterator_tokeniser<Iterator> tokenizer;
                 std::size_t state;
- return next_token(state, first, end, last);
+ return next_token(state, first, end, last, unique_id);
             }
 
             // nothing to invoke, so this is empty
- bool invoke_actions(std::size_t, Iterator const&)
+ bool invoke_actions(std::size_t, std::size_t, std::size_t
+ , Iterator const&)
             {
                 return true; // always accept
             }
@@ -93,9 +93,8 @@
 
             typedef typename base_type::state_type state_type;
             typedef typename base_type::char_type char_type;
- typedef
- typename base_type::semantic_actions_type
- semantic_actions_type;
+ typedef typename base_type::semantic_actions_type
+ semantic_actions_type;
 
             // initialize the shared data
             template <typename IterData>
@@ -103,17 +102,24 @@
               : base_type(data_, first_, last_), state(0)
             {}
 
- std::size_t next(Iterator& end)
+ std::size_t next(Iterator& end, std::size_t& unique_id)
             {
- typedef basic_iterator_tokeniser<Iterator> tokenizer;
- return this->next_token(state, this->first, end, this->last);
+ return this->next_token(state, this->first, end, this->last
+ , unique_id);
             }
 
             std::size_t& get_state() { return state; }
             void set_state_name (char_type const* new_state)
             {
- std::size_t state_id = this->rules.state(new_state);
+ this->rules.state(new_state);
+ for (std::size_t state_id = 0;
+ state_id < sizeof(lexer_state_names)/sizeof(lexer_state_names[0]); ++state_id)
+
+ // if the following assertion fires you've probably been using
+ // a lexer state name which was not defined in your token
+ // definition
                 BOOST_ASSERT(state_id != boost::lexer::npos);
+
                 if (state_id != boost::lexer::npos)
                     state = state_id;
             }
@@ -135,7 +141,7 @@
 
             typedef void functor_type(iterpair_type, std::size_t, bool&, static_data&);
             typedef boost::function<functor_type> functor_wrapper_type;
- typedef std::multimap<std::size_t, functor_wrapper_type>
+ typedef std::vector<std::vector<functor_wrapper_type> >
                 semantic_actions_type;
 
             typedef detail::wrap_action<functor_wrapper_type
@@ -143,34 +149,31 @@
 
             template <typename IterData>
             static_data (IterData const& data_, Iterator& first_, Iterator const& last_)
- : base_type(data_, first_, last_),
- actions(data_.actions_)
- {}
+ : base_type(data_, first_, last_)
+ , actions(data_.actions_), state_names_(data_.state_names_)
+ , state_count_(data_.state_count_) {}
 
             // invoke attached semantic actions, if defined
- bool invoke_actions(std::size_t id, Iterator const& end)
+ bool invoke_actions(std::size_t state, std::size_t id
+ , std::size_t unique_id, Iterator const& end)
             {
- if (actions.empty())
- return true; // nothing to invoke, continue with 'match'
+ if (state >= actions_.size())
+ return true; // no action defined for this state
 
- iterpair_type itp(this->first, end);
- bool match = true;
+ std::vector<functor_wrapper_type> const& actions = actions_[state];
 
- typedef typename semantic_actions_type::const_iterator
- iterator_type;
+ if (unique_id >= actions.size() || !actions[unique_id])
+ return true; // nothing to invoke, continue with 'match'
 
- std::pair<iterator_type, iterator_type> p = actions.equal_range(id);
- while (p.first != p.second)
- {
- ((*p.first).second)(itp, id, match, *this);
- if (!match)
- return false; // return a 'no-match'
- ++p.first;
- }
- return true; // normal execution
+ iterpair_type itp(this->first, end);
+ bool match = true;
+ actions[unique_id](itp, id, match, *this);
+ return match;
             }
 
- semantic_actions_type const& actions;
+ semantic_actions_type const& actions_;
+ std::size_t const state_count_;
+ const char* const* state_names_;
         };
     }
 
@@ -279,8 +282,9 @@
 #endif
 
             Iterator end = data.first;
- std::size_t id = data.next(end);
-
+ std::size_t unique_id = boost::lexer::npos;
+ std::size_t id = data.next(end, unique_id);
+
             if (boost::lexer::npos == id) { // no match
 #if defined(BOOST_SPIRIT_DEBUG)
                 std::string next;
@@ -318,7 +322,7 @@
             std::size_t state = data.get_state();
 
             // invoke attached semantic actions, if there are any defined
- if (!data.invoke_actions(id, end))
+ if (!data.invoke_actions(state, id, unique_id, end))
             {
                 // one of the semantic actions signaled no-match
                 return result = result_type(0);

Modified: trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -30,6 +30,7 @@
         // This is a forward declaration for the generated static table of
         // valid state names
         extern char const* const lexer_state_names[];
+ extern std::size_t const lexer_state_count;
 
         // This is the forward declaration of the generated function to be
         // called to get the next token.
@@ -158,6 +159,8 @@
         {
             typename Functor::next_token_functor next_;
             typename Functor::semantic_actions_type const& actions_;
+ std::size_t const state_count_;
+ const char* const* state_names_;
         };
 
     public:
@@ -167,7 +170,8 @@
         template <typename F>
         iterator_type begin(Iterator& first, Iterator const& last, F next) const
         {
- iterator_data_type iterator_data = { next, actions };
+ iterator_data_type iterator_data = { next, actions
+ , static_::lexer_state_count, static_::lexer_state_names };
             return iterator_type(iterator_data, first, last);
         }
 
@@ -178,7 +182,8 @@
         iterator_type begin(Iterator_& first, Iterator_ const& last) const
         {
             iterator_data_type iterator_data =
- { &lex::lexertl::static_::next_token<Iterator_>, actions };
+ { &lex::lexertl::static_::next_token<Iterator_>, actions,
+ static_::lexer_state_count, static_::lexer_state_names };
             return iterator_type(iterator_data, first, last);
         }
 

Modified: trunk/boost/spirit/home/lex/lexer/sequence.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/sequence.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/sequence.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -40,9 +40,16 @@
           : elements(elements) {}
 
         template <typename LexerDef, typename String>
- void collect(LexerDef& lexdef, String const& state)
+ void collect(LexerDef& lexdef, String const& state) const
         {
- detail::sequence_function<LexerDef, String> f (lexdef, state);
+ detail::sequence_collect_function<LexerDef, String> f (lexdef, state);
+ fusion::any(elements, f);
+ }
+
+ template <typename LexerDef>
+ void add_actions(LexerDef& lexdef) const
+ {
+ detail::sequence_add_actions_function<LexerDef> f (lexdef);
             fusion::any(elements, f);
         }
 

Modified: trunk/boost/spirit/home/lex/lexer/string_token_def.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/string_token_def.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/string_token_def.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -69,6 +69,9 @@
             lexdef.add_token (state.c_str(), str_, id_);
         }
 
+ template <typename LexerDef>
+ void add_actions(LexerDef& lexdef) const {}
+
         std::size_t id() const { return id_; }
 
         string_type str_;

Modified: trunk/boost/spirit/home/lex/lexer/token_def.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/token_def.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/token_def.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -168,6 +168,9 @@
                 lexdef.add_token(state.c_str(), get<char_type>(def), token_id);
         }
 
+ template <typename LexerDef>
+ void add_actions(LexerDef& lexdef) const {}
+
     public:
         typedef Char char_type;
         typedef Idtype id_type;
@@ -180,8 +183,7 @@
 
         explicit token_def(char_type def_, Idtype id_ = Idtype())
           : proto_base_type(terminal_type::make(alias()))
- , def(def_)
- , token_id(Idtype() == id_ ? def_ : id_)
+ , def(def_), token_id(Idtype() == id_ ? def_ : id_)
           , token_state(~0) {}
 
         explicit token_def(string_type const& def_, Idtype id_ = Idtype())
@@ -207,6 +209,7 @@
         // general accessors
         Idtype id() const { return token_id; }
         void id(Idtype id) { token_id = id; }
+
         string_type definition() const
         {
             return (0 == def.which())

Modified: trunk/boost/spirit/home/lex/lexer/token_set.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/token_set.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/token_set.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -91,10 +91,10 @@
 
                 // If the following assertion fires you probably forgot to
                 // associate this token set definition with a lexer instance.
- BOOST_ASSERT(~0 != token_state);
+ BOOST_ASSERT(~0 != token_state_);
 
                 token_type &t = *first;
- if (token_is_valid(t) && token_state == t.state()) {
+ if (token_is_valid(t) && token_state_ == t.state()) {
                 // any of the token definitions matched
                     qi::detail::assign_to(t, attr);
                     ++first;
@@ -123,12 +123,15 @@
             // is not possible. Please create a separate token_set instance
             // from the same set of regular expressions for each lexer state it
             // needs to be associated with.
- BOOST_ASSERT(~0 == token_state || state_id == token_state);
+ BOOST_ASSERT(~0 == token_state_ || state_id == token_state_);
 
- token_state = state_id;
+ token_state_ = state_id;
             lexdef.add_token (state.c_str(), *this);
         }
 
+ template <typename LexerDef>
+ void add_actions(LexerDef& lexdef) const {}
+
     private:
         // allow to use the tokset.add("regex1", id1)("regex2", id2);
         // syntax
@@ -176,15 +179,15 @@
                     tokdef.id(token_id);
                 }
 
- def.add_token (def.initial_state().c_str(), tokdef.definition(),
- token_id);
+ def.add_token(def.initial_state().c_str(), tokdef.definition()
+ , token_id);
                 return *this;
             }
 
             template <typename TokenSet_>
             adder const& operator()(token_set<TokenSet_> const& tokset) const
             {
- def.add_token (def.initial_state().c_str(), tokset);
+ def.add_token(def.initial_state().c_str(), tokset);
                 return *this;
             }
 
@@ -209,18 +212,27 @@
         };
         friend struct pattern_adder;
 
+ private:
+ // Helper function to invoke the necessary 2 step compilation process
+ // on token definition expressions
+ template <typename TokenExpr>
+ void compile2pass(TokenExpr const& expr)
+ {
+ expr.collect(*this, base_token_set::initial_state());
+ expr.add_actions(*this);
+ }
+
     public:
         ///////////////////////////////////////////////////////////////////
         template <typename Expr>
         void define(Expr const& expr)
         {
- compile<lex::domain>(expr).collect(
- *this, base_token_set::initial_state());
+ compile2pass(compile<lex::domain>(expr));
         }
 
         token_set()
           : proto_base_type(terminal_type::make(alias()))
- , add(this_()), add_pattern(this_()), token_state(~0) {}
+ , add(this_()), add_pattern(this_()), token_state_(~0) {}
 
         // allow to assign a token definition expression
         template <typename Expr>
@@ -240,10 +252,10 @@
         adder add;
         pattern_adder add_pattern;
 
- std::size_t state() const { return token_state; }
+ std::size_t state() const { return token_state_; }
 
     private:
- mutable std::size_t token_state;
+ mutable std::size_t token_state_;
     };
 
     // allow to assign a token definition expression

Modified: trunk/boost/spirit/home/lex/reference.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/reference.hpp (original)
+++ trunk/boost/spirit/home/lex/reference.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -37,7 +37,13 @@
         template <typename LexerDef, typename String>
         void collect(LexerDef& lexdef, String const& state) const
         {
- return this->ref.get().collect(lexdef, state);
+ this->ref.get().collect(lexdef, state);
+ }
+
+ template <typename LexerDef>
+ void add_actions(LexerDef& lexdef) const
+ {
+ this->ref.get().add_actions(lexdef);
         }
     };
 

Modified: trunk/boost/spirit/home/support/detail/lexer/consts.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/consts.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/consts.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -15,10 +15,10 @@
 namespace lexer
 {
     enum regex_flags {none = 0, icase = 1, dot_not_newline = 2};
- // 0 = end state, 1 = id, 2 = lex state, 3 = bol, 4 = eol,
- // 5 = dead_state_index
- enum {end_state_index, id_index, state_index, bol_index, eol_index,
- dead_state_index, dfa_offset};
+ // 0 = end state, 1 = id, 2 = unique_id, 3 = lex state, 4 = bol, 5 = eol,
+ // 6 = dead_state_index
+ enum {end_state_index, id_index, unique_id_index, state_index, bol_index,
+ eol_index, dead_state_index, dfa_offset};
 
     const std::size_t max_macro_len = 30;
     const std::size_t num_chars = 256;

Modified: trunk/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -29,6 +29,7 @@
 
         bool _end_state;
         std::size_t _id;
+ std::size_t _unique_id;
         std::size_t _state;
         std::size_t _bol_index;
         std::size_t _eol_index;
@@ -37,6 +38,7 @@
         state () :
             _end_state (false),
             _id (0),
+ _unique_id (npos),
             _state (0),
             _bol_index (npos),
             _eol_index (npos)

Modified: trunk/boost/spirit/home/support/detail/lexer/debug.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/debug.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/debug.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -165,6 +165,8 @@
             {
                 end_state (stream_);
                 stream_ << iter_->id;
+ unique_id (stream_);
+ stream_ << iter_->unique_id;
                 dfa (stream_);
                 stream_ << iter_->goto_dfa;
                 stream_ << std::endl;
@@ -281,6 +283,16 @@
         stream_ << L" END STATE, Id = ";
     }
 
+ static void unique_id (std::ostream &stream_)
+ {
+ stream_ << ", Unique Id = ";
+ }
+
+ static void unique_id (std::wostream &stream_)
+ {
+ stream_ << L", Unique Id = ";
+ }
+
     static void any (std::ostream &stream_)
     {
         stream_ << " . -> ";

Modified: trunk/boost/spirit/home/support/detail/lexer/file_input.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/file_input.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/file_input.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -33,6 +33,7 @@
         struct data
         {
             std::size_t id;
+ std::size_t unique_id;
             const CharT *start;
             const CharT *end;
             std::size_t state;
@@ -40,14 +41,16 @@
             // Construct in end() state.
             data () :
                 id (0),
+ unique_id (npos),
                 state (npos)
             {
             }
 
             bool operator == (const data &rhs_) const
             {
- return id == rhs_.id && start == rhs_.start &&
- end == rhs_.end && state == rhs_.state;
+ return id == rhs_.id && unique_id == rhs_.unique_id &&
+ start == rhs_.start && end == rhs_.end &&
+ state == rhs_.state;
             }
         };
 
@@ -105,12 +108,12 @@
                 _data.id = _input->next (&internals_._lookup->front ()->
                     front (), internals_._dfa_alphabet.front (),
                     &internals_._dfa->front ()->front (), _data.start,
- _data.end);
+ _data.end, _data.unique_id);
             }
             else
             {
                 _data.id = _input->next (internals_, _data.state, _data.start,
- _data.end);
+ _data.end, _data.unique_id);
             }
 
             if (_data.id == 0)
@@ -156,6 +159,7 @@
         iterator iter_;
 
         iter_._input = this;
+ // Over-ride default of 0 (EOF)
         iter_._data.id = npos;
         iter_._data.start = 0;
         iter_._data.end = 0;
@@ -200,7 +204,8 @@
     CharT *_end_buffer;
 
     std::size_t next (const detail::internals &internals_,
- std::size_t &start_state_, const CharT * &start_, const CharT * &end_)
+ std::size_t &start_state_, const CharT * &start_, const CharT * &end_,
+ std::size_t &unique_id_)
     {
         _start_token = _end_token;
 
@@ -213,6 +218,7 @@
         const CharT *curr_ = _start_token;
         bool end_state_ = *ptr_ != 0;
         std::size_t id_ = *(ptr_ + id_index);
+ std::size_t uid_ = *(ptr_ + unique_id_index);
         const CharT *end_token_ = curr_;
 
         for (;;)
@@ -255,6 +261,7 @@
             {
                 end_state_ = true;
                 id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
                 start_state_ = *(ptr_ + state_index);
                 end_token_ = curr_;
             }
@@ -263,6 +270,7 @@
         if (_start_token >= _end_buffer)
         {
             // No more tokens...
+ unique_id_ = npos;
             return 0;
         }
 
@@ -276,6 +284,7 @@
             {
                 end_state_ = true;
                 id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
                 start_state_ = *(ptr_ + state_index);
                 end_token_ = curr_;
             }
@@ -293,23 +302,26 @@
             // No match causes char to be skipped
             _end_token = _start_token + 1;
             id_ = npos;
+ uid_ = npos;
         }
 
         start_ = _start_token;
         end_ = _end_token;
+ unique_id_ = uid_;
         return id_;
     }
 
     std::size_t next (const std::size_t * const lookup_,
         const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
- const CharT * &start_, const CharT * &end_)
+ const CharT * &start_, const CharT * &end_, std::size_t &unique_id_)
     {
         _start_token = _end_token;
 
         const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
         const CharT *curr_ = _start_token;
         bool end_state_ = *ptr_ != 0;
- std::size_t id_ = id_ = *(ptr_ + id_index);
+ std::size_t id_ = *(ptr_ + id_index);
+ std::size_t uid_ = *(ptr_ + unique_id_index);
         const CharT *end_token_ = curr_;
 
         for (;;)
@@ -352,6 +364,7 @@
             {
                 end_state_ = true;
                 id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
                 end_token_ = curr_;
             }
         }
@@ -359,6 +372,7 @@
         if (_start_token >= _end_buffer)
         {
             // No more tokens...
+ unique_id_ = npos;
             return 0;
         }
 
@@ -372,6 +386,7 @@
             {
                 end_state_ = true;
                 id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
                 end_token_ = curr_;
             }
         }
@@ -386,10 +401,12 @@
             // No match causes char to be skipped
             _end_token = _start_token + 1;
             id_ = npos;
+ uid_ = npos;
         }
 
         start_ = _start_token;
         end_ = _end_token;
+ unique_id_ = uid_;
         return id_;
     }
 

Modified: trunk/boost/spirit/home/support/detail/lexer/generator.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/generator.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/generator.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -148,6 +148,7 @@
         const typename rules::string_deque_deque &regexes_ =
             rules_.regexes ();
         const typename rules::id_vector_deque &ids_ = rules_.ids ();
+ std::size_t unique_id_ = 0;
         const typename rules::id_vector_deque &states_ = rules_.states ();
         typename rules::string_deque::const_iterator regex_iter_ =
             regexes_[state_].begin ();
@@ -170,9 +171,9 @@
             seen_BOL_assertion_, seen_EOL_assertion_);
 
         detail::node *root_ = parser::parse (regex_.c_str (),
- regex_.c_str () + regex_.size (), *ids_iter_, *states_iter_,
- rules_.flags (), rules_.locale (), node_ptr_vector_, macromap_,
- token_map_, seen_BOL_assertion_, seen_EOL_assertion_);
+ regex_.c_str () + regex_.size (), *ids_iter_, unique_id_++,
+ *states_iter_, rules_.flags (), rules_.locale (), node_ptr_vector_,
+ macromap_, token_map_, seen_BOL_assertion_, seen_EOL_assertion_);
 
         ++regex_iter_;
         ++ids_iter_;
@@ -186,7 +187,7 @@
             const typename rules::string &regex_ = *regex_iter_;
 
             root_ = parser::parse (regex_.c_str (),
- regex_.c_str () + regex_.size (), *ids_iter_,
+ regex_.c_str () + regex_.size (), *ids_iter_, unique_id_++,
                 *states_iter_, rules_.flags (), rules_.locale (),
                 node_ptr_vector_, macromap_, token_map_,
                 seen_BOL_assertion_, seen_EOL_assertion_);
@@ -277,7 +278,7 @@
             const typename rules::string &name_ = iter_->first;
             const typename rules::string &regex_ = iter_->second;
             detail::node *node_ = parser::parse (regex_.c_str (),
- regex_.c_str () + regex_.size (), 0, 0, flags_,
+ regex_.c_str () + regex_.size (), 0, 0, 0, flags_,
                 locale_, node_ptr_vector_, macromap_, token_map_,
                 seen_BOL_assertion_, seen_EOL_assertion_);
             macro_iter_pair map_iter_ = macromap_.
@@ -362,6 +363,7 @@
     {
         bool end_state_ = false;
         std::size_t id_ = 0;
+ std::size_t unique_id_ = npos;
         std::size_t state_ = 0;
         std::size_t hash_ = 0;
 
@@ -375,8 +377,8 @@
             followpos_->begin (), end_ = followpos_->end ();
             iter_ != end_; ++iter_)
         {
- closure_ex (*iter_, end_state_, id_, state_, set_ptr_.get (),
- vector_ptr_.get (), hash_);
+ closure_ex (*iter_, end_state_, id_, unique_id_, state_,
+ set_ptr_.get (), vector_ptr_.get (), hash_);
         }
 
         bool found_ = false;
@@ -413,6 +415,7 @@
             {
                 dfa_[old_size_] |= end_state;
                 dfa_[old_size_ + id_index] = id_;
+ dfa_[old_size_ + unique_id_index] = unique_id_;
                 dfa_[old_size_ + state_index] = state_;
             }
         }
@@ -421,8 +424,8 @@
     }
 
     static void closure_ex (detail::node *node_, bool &end_state_,
- std::size_t &id_, std::size_t &state_, node_set *set_ptr_,
- node_vector *vector_ptr_, std::size_t &hash_)
+ std::size_t &id_, std::size_t &unique_id_, std::size_t &state_,
+ node_set *set_ptr_, node_vector *vector_ptr_, std::size_t &hash_)
     {
         const bool temp_end_state_ = node_->end_state ();
 
@@ -432,6 +435,7 @@
             {
                 end_state_ = true;
                 id_ = node_->id ();
+ unique_id_ = node_->unique_id ();
                 state_ = node_->lexer_state ();
             }
         }
@@ -502,7 +506,7 @@
                     }
                     else
                     {
- iter_ = lhs_->insert (++iter_, 0);
+ iter_ = lhs_->insert (++iter_, (charset*)0);
                         *iter_ = overlap_.release ();
 
                         // VC++ 6 Hack:
@@ -644,7 +648,7 @@
                     }
                     else
                     {
- iter_ = lhs_->insert (++iter_, 0);
+ iter_ = lhs_->insert (++iter_, (equivset*)0);
                         *iter_ = overlap_.release ();
 
                         // VC++ 6 Hack:
@@ -816,6 +820,7 @@
 
                 new_ptr_[end_state_index] = ptr_[end_state_index];
                 new_ptr_[id_index] = ptr_[id_index];
+ new_ptr_[unique_id_index] = ptr_[unique_id_index];
                 new_ptr_[state_index] = ptr_[state_index];
                 new_ptr_[bol_index] = lookup_ptr_[ptr_[bol_index]];
                 new_ptr_[eol_index] = lookup_ptr_[ptr_[eol_index]];

Modified: trunk/boost/spirit/home/support/detail/lexer/input.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/input.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/input.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -32,6 +32,7 @@
         struct data
         {
             std::size_t id;
+ std::size_t unique_id;
             FwdIter start;
             FwdIter end;
             bool bol;
@@ -40,6 +41,7 @@
             // Construct in end() state.
             data () :
                 id (0),
+ unique_id (npos),
                 bol (false),
                 state (npos)
             {
@@ -47,8 +49,9 @@
 
             bool operator == (const data &rhs_) const
             {
- return id == rhs_.id && start == rhs_.start &&
- end == rhs_.end && bol == rhs_.bol && state == rhs_.state;
+ return id == rhs_.id && unique_id == rhs_.unique_id &&
+ start == rhs_.start && end == rhs_.end &&
+ bol == rhs_.bol && state == rhs_.state;
             }
         };
 
@@ -115,13 +118,14 @@
                         (&internals_._lookup->front ()->front (),
                         internals_._dfa_alphabet.front (),
                         &internals_._dfa->front ()->front (),
- _data.bol, _data.end, _input->_end);
+ _data.bol, _data.end, _input->_end, _data.unique_id);
                 }
                 else
                 {
                     _data.id = next (&internals_._lookup->front ()->front (),
                         internals_._dfa_alphabet.front (), &internals_.
- _dfa->front ()->front (), _data.end, _input->_end);
+ _dfa->front ()->front (), _data.end, _input->_end,
+ _data.unique_id);
                 }
             }
             else
@@ -130,12 +134,12 @@
                     internals_._seen_EOL_assertion)
                 {
                     _data.id = next (internals_, _data.state,
- _data.bol, _data.end, _input->_end);
+ _data.bol, _data.end, _input->_end, _data.unique_id);
                 }
                 else
                 {
                     _data.id = next (internals_, _data.state,
- _data.end, _input->_end);
+ _data.end, _input->_end, _data.unique_id);
                 }
             }
 
@@ -148,9 +152,14 @@
 
         std::size_t next (const detail::internals &internals_,
             std::size_t &start_state_, bool bol_,
- FwdIter &start_token_, const FwdIter &end_)
+ FwdIter &start_token_, const FwdIter &end_,
+ std::size_t &unique_id_)
         {
- if (start_token_ == end_) return 0;
+ if (start_token_ == end_)
+ {
+ unique_id_ = npos;
+ return 0;
+ }
 
         again:
             const std::size_t * lookup_ = &internals_._lookup[start_state_]->
@@ -161,6 +170,7 @@
             FwdIter curr_ = start_token_;
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + id_index);
+ std::size_t uid_ = *(ptr_ + unique_id_index);
             bool end_bol_ = bol_;
             FwdIter end_token_ = start_token_;
 
@@ -199,6 +209,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
                     start_state_ = *(ptr_ + state_index);
                     end_bol_ = bol_;
                     end_token_ = curr_;
@@ -215,6 +226,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
                     start_state_ = *(ptr_ + state_index);
                     end_bol_ = bol_;
                     end_token_ = curr_;
@@ -239,16 +251,22 @@
                 _data.bol = *start_token_ == '\n';
                 ++start_token_;
                 id_ = npos;
+ uid_ = npos;
             }
 
+ unique_id_ = uid_;
             return id_;
         }
 
         std::size_t next (const detail::internals &internals_,
             std::size_t &start_state_, FwdIter &start_token_,
- FwdIter const &end_)
+ FwdIter const &end_, std::size_t &unique_id_)
         {
- if (start_token_ == end_) return 0;
+ if (start_token_ == end_)
+ {
+ unique_id_ = npos;
+ return 0;
+ }
 
         again:
             const std::size_t * lookup_ = &internals_._lookup[start_state_]->
@@ -259,6 +277,7 @@
             FwdIter curr_ = start_token_;
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + id_index);
+ std::size_t uid_ = *(ptr_ + unique_id_index);
             FwdIter end_token_ = start_token_;
 
             while (curr_ != end_)
@@ -277,6 +296,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
                     start_state_ = *(ptr_ + state_index);
                     end_token_ = curr_;
                 }
@@ -294,21 +314,29 @@
                 // No match causes char to be skipped
                 ++start_token_;
                 id_ = npos;
+ uid_ = npos;
             }
 
+ unique_id_ = uid_;
             return id_;
         }
 
         std::size_t next (const std::size_t * const lookup_,
             const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
- bool bol_, FwdIter &start_token_, FwdIter const &end_)
+ bool bol_, FwdIter &start_token_, FwdIter const &end_,
+ std::size_t &unique_id_)
         {
- if (start_token_ == end_) return 0;
+ if (start_token_ == end_)
+ {
+ unique_id_ = npos;
+ return 0;
+ }
 
             const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
             FwdIter curr_ = start_token_;
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + id_index);
+ std::size_t uid_ = *(ptr_ + unique_id_index);
             bool end_bol_ = bol_;
             FwdIter end_token_ = start_token_;
 
@@ -347,6 +375,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
                     end_bol_ = bol_;
                     end_token_ = curr_;
                 }
@@ -362,6 +391,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
                     end_bol_ = bol_;
                     end_token_ = curr_;
                 }
@@ -379,21 +409,29 @@
                 _data.bol = *start_token_ == '\n';
                 ++start_token_;
                 id_ = npos;
+ uid_ = npos;
             }
 
+ unique_id_ = uid_;
             return id_;
         }
 
         std::size_t next (const std::size_t * const lookup_,
             const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
- FwdIter &start_token_, FwdIter const &end_)
+ FwdIter &start_token_, FwdIter const &end_,
+ std::size_t &unique_id_)
         {
- if (start_token_ == end_) return 0;
+ if (start_token_ == end_)
+ {
+ unique_id_ = npos;
+ return 0;
+ }
 
             const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
             FwdIter curr_ = start_token_;
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + id_index);
+ std::size_t uid_ = *(ptr_ + unique_id_index);
             FwdIter end_token_ = start_token_;
 
             while (curr_ != end_)
@@ -412,6 +450,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
                     end_token_ = curr_;
                 }
             }
@@ -426,8 +465,10 @@
                 // No match causes char to be skipped
                 ++start_token_;
                 id_ = npos;
+ uid_ = npos;
             }
 
+ unique_id_ = uid_;
             return id_;
         }
     };
@@ -452,6 +493,7 @@
         iterator iter_;
 
         iter_._input = this;
+ // Over-ride default of 0 (EOI)
         iter_._data.id = npos;
         iter_._data.start = _begin;
         iter_._data.end = _begin;

Modified: trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -51,10 +51,10 @@
 <DUPLICATE> -> '?' | '*' | '+' | '{n[,[m]]}'
 */
     static node *parse (const CharT *start_, const CharT * const end_,
- const std::size_t id_, const std::size_t dfa_state_,
- const regex_flags flags_, const std::locale &locale_,
- node_ptr_vector &node_ptr_vector_, const macro_map &macromap_,
- typename tokeniser::token_map &map_,
+ const std::size_t id_, const std::size_t unique_id_,
+ const std::size_t dfa_state_, const regex_flags flags_,
+ const std::locale &locale_, node_ptr_vector &node_ptr_vector_,
+ const macro_map &macromap_, typename tokeniser::token_map &map_,
         bool &seen_BOL_assertion_, bool &seen_EOL_assertion_)
     {
         node *root_ = 0;
@@ -116,7 +116,7 @@
         {
             node_ptr_vector_->push_back (0);
 
- node *rhs_node_ = new end_node (id_, dfa_state_);
+ node *rhs_node_ = new end_node (id_, unique_id_, dfa_state_);
 
             node_ptr_vector_->back () = rhs_node_;
             node_ptr_vector_->push_back (0);

Modified: trunk/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -18,9 +18,11 @@
 class end_node : public node
 {
 public:
- end_node (const std::size_t id_, const std::size_t lexer_state_) :
+ end_node (const std::size_t id_, const std::size_t unique_id_,
+ const std::size_t lexer_state_) :
         node (false),
         _id (id_),
+ _unique_id (unique_id_),
         _lexer_state (lexer_state_)
     {
         node::_firstpos.push_back (this);
@@ -58,6 +60,11 @@
         return _id;
     }
 
+ virtual std::size_t unique_id () const
+ {
+ return _unique_id;
+ }
+
     virtual std::size_t lexer_state () const
     {
         return _lexer_state;
@@ -65,6 +72,7 @@
 
 private:
     std::size_t _id;
+ std::size_t _unique_id;
     std::size_t _lexer_state;
     node_vector _followpos;
 

Modified: trunk/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -132,6 +132,11 @@
         throw runtime_error ("Internal error node::id()");
     }
 
+ virtual std::size_t unique_id () const
+ {
+ throw runtime_error ("Internal error node::unique_id()");
+ }
+
     virtual std::size_t lexer_state () const
     {
         throw runtime_error ("Internal error node::state()");

Modified: trunk/boost/spirit/home/support/detail/lexer/rules.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/rules.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/rules.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -44,6 +44,27 @@
             return L"INITIAL";
         }
     };
+
+ template <typename CharT>
+ struct dot;
+
+ template<>
+ struct dot<char>
+ {
+ static const char *str ()
+ {
+ return ".";
+ }
+ };
+
+ template<>
+ struct dot<wchar_t>
+ {
+ static const wchar_t *str()
+ {
+ return L".";
+ }
+ };
 }
 
 template<typename CharT>
@@ -60,6 +81,8 @@
     typedef std::deque<string_pair> string_pair_deque;
     typedef std::map<string, std::size_t> string_size_t_map;
     typedef std::pair<string, std::size_t> string_size_t_pair;
+ typedef std::pair<std::size_t, std::size_t> unique_id_key;
+ typedef std::map<unique_id_key, std::size_t> unique_id_map;
 
     basic_rules (const regex_flags flags_ = dot_not_newline) :
         _flags (flags_)
@@ -141,7 +164,7 @@
         }
     }
 
- void add_state (const CharT *name_)
+ std::size_t add_state (const CharT *name_)
     {
         validate (name_);
 
@@ -157,6 +180,9 @@
                 _lexer_state_names.push_back (name_);
             }
         }
+
+ // Initial is not stored, so no need to - 1.
+ return _lexer_state_names.size();
     }
 
     void add_macro (const CharT *name_, const CharT *regex_)
@@ -198,62 +224,68 @@
         }
     }
 
- void add (const CharT *regex_, const std::size_t id_)
+ std::size_t add (const CharT *regex_, const std::size_t id_)
     {
- add (string (regex_), id_);
+ return add (string (regex_), id_);
     }
 
- void add (const CharT *regex_start_, const CharT *regex_end_,
+ std::size_t add (const CharT *regex_start_, const CharT *regex_end_,
         const std::size_t id_)
     {
- add (string (regex_start_, regex_end_), id_);
+ return add (string (regex_start_, regex_end_), id_);
     }
 
- void add (const string &regex_, const std::size_t id_)
+ std::size_t add (const string &regex_, const std::size_t id_)
     {
         check_for_invalid_id (id_);
         _regexes[0].push_back (regex_);
         _ids[0].push_back (id_);
         _states[0].push_back (0);
+ return _regexes[0].size () - 1;
     }
 
     void add (const CharT *curr_state_, const CharT *regex_,
- const CharT *new_state_)
+ const CharT *new_state_, id_vector *id_vec_ = 0)
     {
- add (curr_state_, string (regex_), new_state_);
+ add (curr_state_, string (regex_), new_state_, id_vec_);
     }
 
     void add (const CharT *curr_state_, const CharT *regex_start_,
- const CharT *regex_end_, const CharT *new_state_)
+ const CharT *regex_end_, const CharT *new_state_,
+ id_vector *id_vec_ = 0)
     {
- add (curr_state_, string (regex_start_, regex_end_), new_state_);
+ add (curr_state_, string (regex_start_, regex_end_),
+ new_state_, id_vec_);
     }
 
     void add (const CharT *curr_state_, const string &regex_,
- const CharT *new_state_)
+ const CharT *new_state_, id_vector *id_vec_ = 0)
     {
- add (curr_state_, regex_, 0, new_state_, false);
+ add (curr_state_, regex_, 0, new_state_, false, id_vec_);
     }
 
     void add (const CharT *curr_state_, const CharT *regex_,
- const std::size_t id_, const CharT *new_state_)
+ const std::size_t id_, const CharT *new_state_, id_vector *id_vec_ = 0)
     {
- add (curr_state_, string (regex_), id_, new_state_);
+ add (curr_state_, string (regex_), id_, new_state_, id_vec_);
     }
 
     void add (const CharT *curr_state_, const CharT *regex_start_,
- const CharT *regex_end_, const std::size_t id_, const CharT *new_state_)
+ const CharT *regex_end_, const std::size_t id_,
+ const CharT *new_state_, id_vector *id_vec_ = 0)
     {
- add (curr_state_, string (regex_start_, regex_end_), id_, new_state_);
+ add (curr_state_, string (regex_start_, regex_end_), id_,
+ new_state_, id_vec_);
     }
 
     void add (const CharT *curr_state_, const string &regex_,
- const std::size_t id_, const CharT *new_state_)
+ const std::size_t id_, const CharT *new_state_, id_vector *id_vec_ = 0)
     {
- add (curr_state_, regex_, id_, new_state_, true);
+ add (curr_state_, regex_, id_, new_state_, true, id_vec_);
     }
 
- void add (const CharT *curr_state_, const basic_rules &rules_)
+ void add (const CharT *curr_state_, const basic_rules &rules_,
+ id_vector *id_vec_ = 0)
     {
         const string_deque_deque &regexes_ = rules_.regexes ();
         const id_vector_deque &ids_ = rules_.ids ();
@@ -266,6 +298,7 @@
         typename string_deque::const_iterator regex_iter_;
         typename string_deque::const_iterator regex_end_;
         typename id_vector::const_iterator id_iter_;
+ id_vector *temp_id_vec_ = id_vec_;
 
         for (; state_regex_iter_ != state_regex_end_; ++state_regex_iter_)
         {
@@ -275,7 +308,16 @@
 
             for (; regex_iter_ != regex_end_; ++regex_iter_, ++id_iter_)
             {
- add (curr_state_, *regex_iter_, *id_iter_, curr_state_);
+ add (curr_state_, *regex_iter_, *id_iter_, detail::dot<CharT>::str(),
+ temp_id_vec_);
+
+ if (temp_id_vec_)
+ {
+ // As suggested by Hartmut, only fill the id_vec_ once.
+ // The dfa sizes can be examined at the end to get a range
+ // of ids.
+ temp_id_vec_ = 0;
+ }
             }
         }
     }
@@ -328,6 +370,19 @@
         return detail::initial<CharT>::str ();
     }
 
+ std::size_t retrieve_id (std::size_t state, std::size_t id) const
+ {
+ unique_id_key key (state, id);
+ typename unique_id_map::const_iterator it = _unique_ids.find (key);
+
+ if (it == _unique_ids.end ())
+ {
+ return npos;
+ }
+
+ return (*it).second;
+ }
+
 private:
     string_size_t_map _statemap;
     string_pair_deque _macrodeque;
@@ -338,13 +393,20 @@
     regex_flags _flags;
     std::locale _locale;
     string_deque _lexer_state_names;
+ unique_id_map _unique_ids;
 
     void add (const CharT *curr_state_, const string &regex_,
- const std::size_t id_, const CharT *new_state_, const bool check_)
+ const std::size_t id_, const CharT *new_state_, const bool check_,
+ id_vector *id_vec_ = 0)
     {
         const bool star_ = *curr_state_ == '*' && *(curr_state_ + 1) == 0;
         const bool dot_ = *new_state_ == '.' && *(new_state_ + 1) == 0;
 
+ if (id_vec_)
+ {
+ id_vec_->clear();
+ }
+
         if (check_)
         {
             check_for_invalid_id (id_);
@@ -443,6 +505,13 @@
             _regexes[curr_].push_back (regex_);
             _ids[curr_].push_back (id_);
             _states[curr_].push_back (dot_ ? curr_ : new_);
+
+ if (id_vec_)
+ {
+ id_vec_->push_back (_regexes[curr_].size () - 1);
+ }
+
+ map_id (dot_ ? curr_ : new_, id_, _regexes[curr_].size () - 1);
         }
     }
 
@@ -528,6 +597,22 @@
             break;
         }
     }
+
+ bool map_id (std::size_t state, std::size_t id, std::size_t unique_id)
+ {
+ typedef typename unique_id_map::iterator iterator_type;
+
+ unique_id_key key (state, id);
+ iterator_type it = _unique_ids.find (key);
+ if (it != _unique_ids.end ())
+ {
+ (*it).second = unique_id;
+ return false;
+ }
+
+ typedef typename unique_id_map::value_type value_type;
+ return _unique_ids.insert (value_type (key, unique_id)).second;
+ }
 };
 
 typedef basic_rules<char> rules;

Modified: trunk/boost/spirit/home/support/detail/lexer/state_machine.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/state_machine.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/state_machine.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -45,6 +45,7 @@
             // Current state info
             bool end_state;
             std::size_t id;
+ std::size_t unique_id;
             std::size_t goto_dfa;
             std::size_t bol_index;
             std::size_t eol_index;
@@ -61,6 +62,7 @@
                 transition (npos),
                 end_state (false),
                 id (npos),
+ unique_id (npos),
                 goto_dfa (npos),
                 bol_index (npos),
                 eol_index (npos),
@@ -77,6 +79,7 @@
                     transition == rhs_.transition &&
                     end_state == rhs_.end_state &&
                     id == rhs_.id &&
+ unique_id == rhs_.unique_id &&
                     goto_dfa == rhs_.goto_dfa &&
                     bol_index == rhs_.bol_index &&
                     eol_index == rhs_.eol_index &&
@@ -197,6 +200,7 @@
                 _transitions = _data.transitions = ptr_->_transitions.size ();
                 _data.end_state = ptr_->_end_state;
                 _data.id = ptr_->_id;
+ _data.unique_id = ptr_->_unique_id;
                 _data.goto_dfa = ptr_->_state;
                 _data.bol_index = ptr_->_bol_index;
                 _data.eol_index = ptr_->_eol_index;
@@ -281,6 +285,7 @@
             iter_._transition = 0;
             iter_._data.end_state = ptr_->front ()._end_state;
             iter_._data.id = ptr_->front ()._id;
+ iter_._data.unique_id = ptr_->front()._unique_id;
             iter_._data.goto_dfa = ptr_->front ()._state;
             iter_._data.bol_index = ptr_->front ()._bol_index;
             iter_._data.eol_index = ptr_->front ()._eol_index;
@@ -368,6 +373,7 @@
 
                 state_->_end_state = *read_ptr_ != 0;
                 state_->_id = *(read_ptr_ + id_index);
+ state_->_unique_id = *(read_ptr_ + unique_id_index);
                 state_->_state = *(read_ptr_ + state_index);
                 state_->_bol_index = *(read_ptr_ + bol_index) - 1;
                 state_->_eol_index = *(read_ptr_ + eol_index) - 1;

Modified: trunk/libs/spirit/example/lex/static_lexer/word_count_static.hpp
==============================================================================
--- trunk/libs/spirit/example/lex/static_lexer/word_count_static.hpp (original)
+++ trunk/libs/spirit/example/lex/static_lexer/word_count_static.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -6,8 +6,8 @@
 
 // Auto-generated by boost::lexer, do not edit
 
-#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_MAY_14_2009_13_47_08)
-#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_MAY_14_2009_13_47_08
+#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_MAY_22_2009_09_41_02)
+#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_MAY_22_2009_09_41_02
 
 #include <boost/detail/iterator.hpp>
 #include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
@@ -22,52 +22,58 @@
     "INITIAL"
 };
 
+// this variable defines the number of lexer states
+std::size_t const lexer_state_count = 1;
+
 template<typename Iterator>
 std::size_t next_token (std::size_t &start_state_, Iterator const& start_,
- Iterator &start_token_, Iterator const& end_)
+ Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)
 {
- enum {end_state_index, id_index, state_index, bol_index, eol_index,
- dead_state_index, dfa_offset};
+ enum {end_state_index, id_index, unique_id_index, state_index, bol_index,
+ eol_index, dead_state_index, dfa_offset};
     static const std::size_t npos = static_cast<std::size_t>(~0);
- static const std::size_t lookup_[256] = {7, 7, 7, 7, 7, 7, 7, 7,
- 7, 8, 6, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 8, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7};
- static const std::size_t dfa_alphabet_ = 9;
- static const std::size_t dfa_[45] = {0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 3,
- 2, 4, 1, 65536, 0, 0, 0, 0,
- 0, 2, 0, 1, 10, 0, 0, 0,
- 0, 0, 0, 0, 1, 65537, 0, 0,
- 0, 0, 0, 0, 0};
+ static const std::size_t lookup_[256] = {
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 9, 7, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 9, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8 };
+ static const std::size_t dfa_alphabet_ = 10;
+ static const std::size_t dfa_[50] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 3, 2, 4, 1, 65536, 0, 0,
+ 0, 0, 0, 0, 2, 0, 1, 10,
+ 1, 0, 0, 0, 0, 0, 0, 0,
+ 1, 65537, 2, 0, 0, 0, 0, 0,
+ 0, 0 };
 
     if (start_token_ == end_) return 0;
 
@@ -75,13 +81,13 @@
     Iterator curr_ = start_token_;
     bool end_state_ = *ptr_ != 0;
     std::size_t id_ = *(ptr_ + id_index);
+ std::size_t uid_ = *(ptr_ + unique_id_index);
     Iterator end_token_ = start_token_;
 
     while (curr_ != end_)
     {
         std::size_t const state_ =
- ptr_[lookup_[static_cast<unsigned char>
- (*curr_++)]];
+ ptr_[lookup_[static_cast<unsigned char>(*curr_++)]];
 
         if (state_ == 0) break;
 
@@ -91,6 +97,7 @@
         {
             end_state_ = true;
             id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
             end_token_ = curr_;
         }
     }
@@ -103,8 +110,10 @@
     else
     {
         id_ = npos;
+ uid_ = npos;
     }
 
+ unique_id_ = uid_;
     return id_;
 }
 


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk