Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r53234 - in trunk: boost/spirit/home/lex boost/spirit/home/lex/lexer boost/spirit/home/lex/lexer/lexertl boost/spirit/home/support/detail/lexer boost/spirit/home/support/detail/lexer/parser libs/spirit/example/lex/static_lexer
From: hartmut.kaiser_at_[hidden]
Date: 2009-05-24 20:31:55


Author: hkaiser
Date: 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
New Revision: 53234
URL: http://svn.boost.org/trac/boost/changeset/53234

Log:
Spirit: updated lexertl, fixed static lexing with states and semantic actions
Added:
   trunk/boost/spirit/home/lex/lexer/lexertl/unique_id.hpp (contents, props changed)
   trunk/libs/spirit/example/lex/static_lexer/word_count_lexer_generate.cpp (contents, props changed)
   trunk/libs/spirit/example/lex/static_lexer/word_count_lexer_static.cpp (contents, props changed)
   trunk/libs/spirit/example/lex/static_lexer/word_count_lexer_static.hpp (contents, props changed)
   trunk/libs/spirit/example/lex/static_lexer/word_count_lexer_tokens.hpp (contents, props changed)
Text files modified:
   trunk/boost/spirit/home/lex/lexer/action.hpp | 2
   trunk/boost/spirit/home/lex/lexer/char_token_def.hpp | 4
   trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp | 4
   trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp | 49 +++++++++--
   trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp | 84 ++++++++++---------
   trunk/boost/spirit/home/lex/lexer/lexertl/static_functor.hpp | 28 +++---
   trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp | 152 +++++++++++++++++++++++------------
   trunk/boost/spirit/home/lex/lexer/string_token_def.hpp | 4
   trunk/boost/spirit/home/lex/lexer/token_def.hpp | 66 ++++++++------
   trunk/boost/spirit/home/lex/reference.hpp | 4
   trunk/boost/spirit/home/support/detail/lexer/generator.hpp | 16 ++-
   trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp | 22 +---
   trunk/boost/spirit/home/support/detail/lexer/rules.hpp | 168 ++++++++++++++++++++-------------------
   trunk/libs/spirit/example/lex/static_lexer/Jamfile | 3
   trunk/libs/spirit/example/lex/static_lexer/word_count_generate.cpp | 9 +
   trunk/libs/spirit/example/lex/static_lexer/word_count_static.cpp | 9 +
   trunk/libs/spirit/example/lex/static_lexer/word_count_static.hpp | 33 ++++++-
   17 files changed, 396 insertions(+), 261 deletions(-)

Modified: trunk/boost/spirit/home/lex/lexer/action.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/action.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/action.hpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -42,7 +42,7 @@
 
             // retrieve the id of the associated token_def and register the
             // given semantic action with the lexer instance
- lexdef.add_action(subject.id(), subject.state(), f);
+ lexdef.add_action(subject.unique_id(), subject.state(), f);
         }
 
         Subject subject;

Modified: trunk/boost/spirit/home/lex/lexer/char_token_def.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/char_token_def.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/char_token_def.hpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -66,13 +66,15 @@
         template <typename LexerDef, typename String>
         void collect(LexerDef& lexdef, String const& state) const
         {
- lexdef.add_token (state.c_str(), ch, static_cast<std::size_t>(ch));
+ unique_id_ = lexdef.add_token (state.c_str(), ch
+ , static_cast<std::size_t>(ch));
         }
 
         template <typename LexerDef>
         void add_actions(LexerDef& lexdef) const {}
 
         std::size_t id() const { return static_cast<std::size_t>(ch); }
+ std::size_t unique_id() const { return unique_id_; }
 
         char_type ch;
         mutable std::size_t unique_id_;

Modified: trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -284,7 +284,7 @@
                 for (std::size_t i = 0; i < 10 && it != data.last; ++it, ++i)
                     next += *it;
 
- std::cerr << "Not matched, in state: " << data.state
+ std::cerr << "Not matched, in state: " << data.get_state()
                           << ", lookahead: >" << next << "<" << std::endl;
 #endif
                 return result = result_type(0);
@@ -305,7 +305,7 @@
                     next += *it;
 
                 std::cerr << "Matched: " << id << ", in state: "
- << data.state << ", string: >"
+ << data.get_state() << ", string: >"
                           << std::basic_string<char_type>(data.first, end) << "<"
                           << ", lookahead: >" << next << "<" << std::endl;
             }

Modified: trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -29,7 +29,7 @@
     // names, but we need it sorted using the state ids.
     template <typename Char>
     inline bool
- generate_cpp_state_names (boost::lexer::basic_rules<Char> const& rules_
+ generate_cpp_state_info (boost::lexer::basic_rules<Char> const& rules_
       , std::ostream &os_, char const* name_suffix = "")
     {
         // we need to re-sort the state names in ascending order of the state
@@ -48,7 +48,8 @@
         }
 
         os_ << "// this table defines the names of the lexer states\n";
- os_ << "char const* const lexer_state_names" << name_suffix
+ os_ << "char const* const lexer_state_names"
+ << (name_suffix[0] ? "_" : "") << name_suffix
             << "[" << rules_.statemap().size() << "] = \n{\n";
 
         typedef typename reverse_state_map_type::iterator iterator;
@@ -69,10 +70,31 @@
         os_ << "};\n\n";
 
         os_ << "// this variable defines the number of lexer states\n";
- os_ << "std::size_t const lexer_state_count = "
- << rules_.statemap().size() << ";\n\n";
+ os_ << "std::size_t const lexer_state_count"
+ << (name_suffix[0] ? "_" : "") << name_suffix
+ << " = " << rules_.statemap().size() << ";\n\n";
+ return os_.good();
+ }
 
- return true;
+ inline bool
+ generate_cpp_state_table (std::ostream &os_, char const* name_suffix = "")
+ {
+ os_ << "// this defines a generic accessor for the information above\n";
+ os_ << "struct lexer"
+ << (name_suffix[0] ? "_" : "") << name_suffix << "\n{\n";
+ os_ << " static std::size_t const state_count()\n";
+ os_ << " {\n return lexer_state_count"
+ << (name_suffix[0] ? "_" : "") << name_suffix <<"; \n }\n\n";
+ os_ << " static char const* const state_name(std::size_t idx)\n";
+ os_ << " {\n return lexer_state_names"
+ << (name_suffix[0] ? "_" : "") << name_suffix <<"[idx]; \n }\n\n";
+ os_ << " template<typename Iterator>\n";
+ os_ << " static std::size_t next(std::size_t &start_state_, Iterator const& start_\n";
+ os_ << " , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n";
+ os_ << " {\n return next_token"
+ << (name_suffix[0] ? "_" : "") << name_suffix
+ << "(start_state_, start_, start_token_, end_, unique_id_); \n }\n};\n\n";
+ return os_.good();
     }
 
     ///////////////////////////////////////////////////////////////////////////
@@ -99,7 +121,9 @@
             "http://www.boost.org/LICENSE_1_0.txt)\n\n";
         os_ << "// Auto-generated by boost::lexer, do not edit\n\n";
 
- std::string guard(__DATE__ "_" __TIME__);
+ std::string guard(name_suffix);
+ guard += name_suffix[0] ? "_" : "";
+ guard += __DATE__ "_" __TIME__;
         std::string::size_type p = guard.find_first_of(": ");
         while (std::string::npos != p)
         {
@@ -119,12 +143,14 @@
         os_ << "namespace boost { namespace spirit { namespace lex { "
             "namespace lexertl { namespace static_ {\n\n";
 
- // generate the table containing state names
- if (!generate_cpp_state_names(rules_, os_, name_suffix))
+ // generate the lexer state information variables
+ if (!generate_cpp_state_info(rules_, os_, name_suffix))
             return false;
 
+ os_ << "// this function returns the next matched token\n";
         os_ << "template<typename Iterator>\n";
- os_ << "std::size_t next_token" << name_suffix << " (";
+ os_ << "std::size_t next_token" << (name_suffix[0] ? "_" : "")
+ << name_suffix << " (";
 
         if (dfas_ > 1 || !optimize_parameters)
         {
@@ -146,7 +172,7 @@
         os_ << "{\n";
         os_ << " enum {end_state_index, id_index, unique_id_index, "
           "state_index, bol_index,\n";
- os_ << " eol_index, dead_state_index, dfa_offset};\n";
+ os_ << " eol_index, dead_state_index, dfa_offset};\n\n";
         os_ << " static const std::size_t npos = static_cast"
           "<std::size_t>(~0);\n";
 
@@ -537,6 +563,9 @@
         os_ << " return id_;\n";
         os_ << "}\n\n";
 
+ if (!generate_cpp_state_table(os_, name_suffix))
+ return false;
+
         os_ << "}}}}} // namespace boost::spirit::lex::lexertl::static_\n\n";
 
         os_ << "#endif\n";

Modified: trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -21,6 +21,7 @@
 #include <boost/spirit/home/lex/lexer/lexertl/token.hpp>
 #include <boost/spirit/home/lex/lexer/lexertl/functor.hpp>
 #include <boost/spirit/home/lex/lexer/lexertl/iterator.hpp>
+#include <boost/spirit/home/lex/lexer/lexertl/unique_id.hpp>
 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
 #include <boost/spirit/home/support/detail/lexer/debug.hpp>
 #endif
@@ -73,6 +74,20 @@
             }
             return result;
         }
+
+ ///////////////////////////////////////////////////////////////////////
+ //
+ ///////////////////////////////////////////////////////////////////////
+ inline boost::lexer::regex_flags map_flags(unsigned int flags)
+ {
+ unsigned int retval = boost::lexer::none;
+ if (flags & match_flags::match_not_dot_newline)
+ retval |= boost::lexer::dot_not_newline;
+ if (flags & match_flags::match_icase)
+ retval |= boost::lexer::icase;
+
+ return boost::lexer::regex_flags(retval);
+ }
     }
 
     ///////////////////////////////////////////////////////////////////////////
@@ -90,42 +105,46 @@
         typedef Token token_type;
         typedef typename Token::id_type id_type;
 
+ token_set(unsigned int flags = 0)
+ : rules_(detail::map_flags(flags), &unique_id<id_type>::get)
+ {}
+
         // interface for token definition management
- void add_token (char_type const* state, char_type tokendef
+ std::size_t add_token (char_type const* state, char_type tokendef
           , std::size_t token_id)
         {
- rules.add(state, detail::escape(tokendef), token_id, state);
+ return rules_.add(state, detail::escape(tokendef), token_id, state);
         }
 
- void add_token (char_type const* state, string_type const& tokendef
+ std::size_t add_token (char_type const* state, string_type const& tokendef
           , std::size_t token_id)
         {
- rules.add(state, tokendef, token_id, state);
+ return rules_.add(state, tokendef, token_id, state);
         }
 
         // interface for pattern definition management
- void add_pattern (char_type const* state, string_type const& name
+ std::size_t add_pattern (char_type const* state, string_type const& name
           , string_type const& patterndef)
         {
             add_state(state);
- rules.add_macro(name.c_str(), patterndef);
+ return rules_.add_macro(name.c_str(), patterndef);
         }
 
- boost::lexer::rules const& get_rules() const { return rules; }
+ boost::lexer::rules const& get_rules() const { return rules_; }
 
- void clear() { rules.clear(); }
+ void clear() { rules_.clear(); }
 
         std::size_t add_state(char_type const* state)
         {
- return rules.add_state(state);
+ return rules_.add_state(state);
         }
         string_type initial_state() const
         {
- return string_type(rules.initial());
+ return string_type(rules_.initial());
         }
 
     private:
- boost::lexer::basic_rules<char_type> rules;
+ boost::lexer::basic_rules<char_type> rules_;
     };
 
     ///////////////////////////////////////////////////////////////////////////
@@ -236,49 +255,38 @@
         }
 
     protected:
- static boost::lexer::regex_flags map_flags(unsigned int flags)
- {
- unsigned int retval = boost::lexer::none;
- if (flags & match_flags::match_not_dot_newline)
- retval |= boost::lexer::dot_not_newline;
- if (flags & match_flags::match_icase)
- retval |= boost::lexer::icase;
-
- return boost::lexer::regex_flags(retval);
- }
-
         // Lexer instances can be created by means of a derived class only.
         lexer(unsigned int flags)
- : flags_(map_flags(flags)), initialized_dfa_(false)
- {
- rules_.flags(flags_);
- }
+ : flags_(detail::map_flags(flags))
+ , rules_(flags_, &unique_id<id_type>::get)
+ , initialized_dfa_(false)
+ {}
 
     public:
         // interface for token definition management
- void add_token(char_type const* state, char_type tokendef,
+ std::size_t add_token(char_type const* state, char_type tokendef,
             std::size_t token_id)
         {
             add_state(state);
- rules_.add(state, detail::escape(tokendef), token_id, state);
             initialized_dfa_ = false;
+ return rules_.add(state, detail::escape(tokendef), token_id, state);
         }
- void add_token(char_type const* state, string_type const& tokendef,
+ std::size_t add_token(char_type const* state, string_type const& tokendef,
             std::size_t token_id)
         {
             add_state(state);
- rules_.add(state, tokendef, token_id, state);
             initialized_dfa_ = false;
+ return rules_.add(state, tokendef, token_id, state);
         }
 
         // Allow a token_set to be associated with this lexer instance. This
         // copies all token definitions of the right hand side into this lexer
         // instance.
- void add_token(char_type const* state, token_set const& tokset)
+ std::size_t add_token(char_type const* state, token_set const& tokset)
         {
             add_state(state);
- rules_.add(state, tokset.get_rules());
             initialized_dfa_ = false;
+ return rules_.add(state, tokset.get_rules());
         }
 
         // Allow to associate a whole lexer instance with another lexer
@@ -286,12 +294,12 @@
         // lexer into this instance.
         template <typename Token_, typename Iterator_, typename Functor_
           , typename TokenSet_>
- void add_token(char_type const* state
+ std::size_t add_token(char_type const* state
           , lexer<Token_, Iterator_, Functor_, TokenSet_> const& lexer_def)
         {
             add_state(state);
- rules_.add(state, lexer_def.get_rules());
             initialized_dfa_ = false;
+ return rules_.add(state, lexer_def.get_rules());
         }
 
         // interface for pattern definition management
@@ -328,7 +336,7 @@
 
         // Register a semantic action with the given id
         template <typename F>
- void add_action(id_type id, std::size_t state, F act)
+ void add_action(id_type unique_id, std::size_t state, F act)
         {
             // If you get compilation errors below stating value_type not being
             // a member of boost::fusion::unused_type, then you are probably
@@ -342,9 +350,6 @@
             if (actions_.size() <= state)
                 actions_.resize(state + 1);
 
- std::size_t unique_id = rules_.retrieve_id(state, id);
- BOOST_ASSERT(boost::lexer::npos != unique_id);
-
             value_type& actions (actions_[state]);
             if (actions.size() <= unique_id)
                 actions.resize(unique_id + 1);
@@ -371,8 +376,9 @@
     private:
         // lexertl specific data
         mutable boost::lexer::basic_state_machine<char_type> state_machine_;
- boost::lexer::basic_rules<char_type> rules_;
+ std::size_t unique_ids_;
         boost::lexer::regex_flags flags_;
+ boost::lexer::basic_rules<char_type> rules_;
 
         typename Functor::semantic_actions_type actions_;
         mutable bool initialized_dfa_;

Modified: trunk/boost/spirit/home/lex/lexer/lexertl/static_functor.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/static_functor.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/static_functor.hpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -52,6 +52,7 @@
                 Iterator const&, Iterator&, Iterator const&, std::size_t&);
 
             typedef unused_type semantic_actions_type;
+ typedef unused_type get_state_id_type;
 
             typedef detail::wrap_action<unused_type, iterpair_type, static_data>
                 wrap_action_type;
@@ -95,25 +96,25 @@
             typedef typename base_type::char_type char_type;
             typedef typename base_type::semantic_actions_type
                 semantic_actions_type;
+ typedef std::size_t (*get_state_id_type)(char const*);
 
             // initialize the shared data
             template <typename IterData>
             static_data (IterData const& data_, Iterator& first_, Iterator const& last_)
- : base_type(data_, first_, last_), state(0)
+ : base_type(data_, first_, last_), state_(0)
+ , get_state_id_(data_.get_state_id_)
             {}
 
             std::size_t next(Iterator& end, std::size_t& unique_id)
             {
- return this->next_token(state, this->first, end, this->last
+ return this->next_token(state_, this->first, end, this->last
                   , unique_id);
             }
 
- std::size_t& get_state() { return state; }
+ std::size_t& get_state() { return state_; }
             void set_state_name (char_type const* new_state)
             {
- this->rules.state(new_state);
- for (std::size_t state_id = 0;
- state_id < sizeof(lexer_state_names)/sizeof(lexer_state_names[0]); ++state_id)
+ std::size_t state_id = get_state_id_(new_state);
 
                 // if the following assertion fires you've probably been using
                 // a lexer state name which was not defined in your token
@@ -121,10 +122,11 @@
                 BOOST_ASSERT(state_id != boost::lexer::npos);
 
                 if (state_id != boost::lexer::npos)
- state = state_id;
+ state_ = state_id;
             }
 
- std::size_t state;
+ std::size_t state_;
+ get_state_id_type get_state_id_;
         };
 
         ///////////////////////////////////////////////////////////////////////
@@ -143,6 +145,7 @@
             typedef boost::function<functor_type> functor_wrapper_type;
             typedef std::vector<std::vector<functor_wrapper_type> >
                 semantic_actions_type;
+ typedef typename base_type::get_state_id_type get_state_id_type;
 
             typedef detail::wrap_action<functor_wrapper_type
               , iterpair_type, static_data> wrap_action_type;
@@ -150,8 +153,7 @@
             template <typename IterData>
             static_data (IterData const& data_, Iterator& first_, Iterator const& last_)
               : base_type(data_, first_, last_)
- , actions(data_.actions_), state_names_(data_.state_names_)
- , state_count_(data_.state_count_) {}
+ , actions_(data_.actions_) {}
 
             // invoke attached semantic actions, if defined
             bool invoke_actions(std::size_t state, std::size_t id
@@ -172,8 +174,6 @@
             }
 
             semantic_actions_type const& actions_;
- std::size_t const state_count_;
- const char* const* state_names_;
         };
     }
 
@@ -292,7 +292,7 @@
                 for (std::size_t i = 0; i < 10 && it != data.last; ++it, ++i)
                     next += *it;
 
- std::cerr << "Not matched, in state: " << data.state
+ std::cerr << "Not matched, in state: " << data.get_state()
                           << ", lookahead: >" << next << "<" << std::endl;
 #endif
                 return result = result_type(0);
@@ -313,7 +313,7 @@
                     next += *it;
 
                 std::cerr << "Matched: " << id << ", in state: "
- << data.state << ", string: >"
+ << data.get_state() << ", string: >"
                           << std::basic_string<char_type>(data.first, end) << "<"
                           << ", lookahead: >" << next << "<" << std::endl;
             }

Modified: trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -14,49 +14,50 @@
 #include <boost/spirit/home/lex/lexer/lexertl/token.hpp>
 #include <boost/spirit/home/lex/lexer/lexertl/static_functor.hpp>
 #include <boost/spirit/home/lex/lexer/lexertl/iterator.hpp>
+#include <boost/spirit/home/lex/lexer/lexertl/unique_id.hpp>
 #if defined(BOOST_SPIRIT_DEBUG)
 #include <boost/spirit/home/support/detail/lexer/debug.hpp>
 #endif
+#include <boost/algorithm/string/predicate.hpp>
 
 namespace boost { namespace spirit { namespace lex { namespace lexertl
 {
     ///////////////////////////////////////////////////////////////////////////
- // forward declaration only
- namespace static_
- {
- // Both items, the table of names and the function to return the next
- // token have to be generated using the function generate_static().
- //
- // This is a forward declaration for the generated static table of
- // valid state names
- extern char const* const lexer_state_names[];
- extern std::size_t const lexer_state_count;
-
- // This is the forward declaration of the generated function to be
- // called to get the next token.
- template <typename Iterator>
- std::size_t next_token (std::size_t& state_, Iterator const& start_,
- Iterator &current_, Iterator const& end_);
- }
-
- ///////////////////////////////////////////////////////////////////////////
     // static_token_set
     ///////////////////////////////////////////////////////////////////////////
- template <typename Token, typename Iterator = typename Token::iterator_type>
+ template <typename Token
+ , typename LexerTables = static_::lexer
+ , typename Iterator = typename Token::iterator_type>
     class static_token_set
     {
     protected:
         typedef typename boost::detail::iterator_traits<Iterator>::value_type
             char_type;
         typedef std::basic_string<char_type> string_type;
+ typedef LexerTables tables_type;
+
+ static std::size_t get_state_id(char const* state)
+ {
+ for (std::size_t i = 0; i < tables_type::state_count(); ++i)
+ {
+ if (boost::equals(tables_type::state_name(i), state))
+ return i;
+ }
+ return ~0;
+ }
 
     public:
         typedef Token token_type;
         typedef typename Token::id_type id_type;
 
+ static_token_set(unsigned int flags = 0) {}
+
         // interface for token definition management
- void add_token (char_type const* state, string_type const& tokendef
- , std::size_t token_id) {}
+ std::size_t add_token (char_type const* state
+ , string_type const& tokendef, std::size_t token_id)
+ {
+ return unique_id<id_type>::get();
+ }
 
         // interface for pattern definition management
         void add_pattern (char_type const* state, string_type const& name
@@ -66,11 +67,11 @@
 
         std::size_t add_state(char_type const* state)
         {
- return 0;
+ return get_state_id(state);
         }
         string_type initial_state() const
         {
- return lex::lexertl::static_::lexer_state_names[0];
+ return tables_type::state_name(0);
         }
     };
 
@@ -104,6 +105,7 @@
     // template parameters:
     // Token The type of the tokens to be returned from the
     // exposed token iterator.
+ // LexerTables See explanations below.
     // Iterator The type of the iterator used to access the
     // underlying character stream.
     // Functor The type of the InputPolicy to use to instantiate
@@ -113,6 +115,31 @@
     // with this lexer type. This is used for the
     // token_set typedef described above only.
     //
+ // Additionally, this implementation of a static lexer has a template
+ // parameter LexerTables allowing to customize the static lexer tables
+ // to be used. The LexerTables is expected to be a type exposing
+ // the following functions:
+ //
+ // static std::size_t const state_count()
+ //
+ // This function needs toreturn the number of lexer states
+ // contained in the table returned from the state_names()
+ // function.
+ //
+ // static char const* const* state_names()
+ //
+ // This function needs to return a pointer to a table of
+ // names of all lexer states. The table needs to have as
+ // much entries as the state_count() function returns
+ //
+ // template<typename Iterator>
+ // std::size_t next(std::size_t &start_state_, Iterator const& start_
+ // , Iterator &start_token_, Iterator const& end_
+ // , std::size_t& unique_id_);
+ //
+ // This function is expected to return the next matched
+ // token from the underlying input stream.
+ //
     ///////////////////////////////////////////////////////////////////////////
 
     ///////////////////////////////////////////////////////////////////////////
@@ -130,9 +157,11 @@
     //
     ///////////////////////////////////////////////////////////////////////////
     template <typename Token = token<>
+ , typename LexerTables = static_::lexer
       , typename Iterator = typename Token::iterator_type
       , typename Functor = static_functor<Token, Iterator, mpl::false_>
- , typename TokenSet = lex::token_set<static_token_set<Token, Iterator> > >
+ , typename TokenSet =
+ lex::token_set<static_token_set<Token, LexerTables, Iterator> > >
     class static_lexer
     {
     public:
@@ -159,22 +188,22 @@
         {
             typename Functor::next_token_functor next_;
             typename Functor::semantic_actions_type const& actions_;
- std::size_t const state_count_;
- const char* const* state_names_;
+ std::size_t (*get_state_id_)(char const*);
         };
 
- public:
- // Return the start iterator usable for iterating over the generated
- // tokens, the Functor F is called to match the next token from the
- // input.
- template <typename F>
- iterator_type begin(Iterator& first, Iterator const& last, F next) const
- {
- iterator_data_type iterator_data = { next, actions
- , static_::lexer_state_count, static_::lexer_state_names };
- return iterator_type(iterator_data, first, last);
+ typedef LexerTables tables_type;
+
+ static std::size_t get_state_id(char const* state)
+ {
+ for (std::size_t i = 0; i < tables_type::state_count(); ++i)
+ {
+ if (boost::equals(tables_type::state_name(i), state))
+ return i;
+ }
+ return ~0;
         }
 
+ public:
         // Return the start iterator usable for iterating over the generated
         // tokens, the generated function next_token(...) is called to match
         // the next token from the input.
@@ -182,8 +211,7 @@
         iterator_type begin(Iterator_& first, Iterator_ const& last) const
         {
             iterator_data_type iterator_data =
- { &lex::lexertl::static_::next_token<Iterator_>, actions,
- static_::lexer_state_count, static_::lexer_state_names };
+ { &tables_type::next<Iterator_>, actions_, get_state_id };
             return iterator_type(iterator_data, first, last);
         }
 
@@ -200,11 +228,20 @@
 
     public:
         // interface for token definition management
- void add_token (char_type const* state, char_type tokendef
- , std::size_t token_id) {}
- void add_token (char_type const* state, string_type const& tokendef
- , std::size_t token_id) {}
- void add_token(char_type const* state, token_set& tokset) {}
+ std::size_t add_token (char_type const* state, char_type tokendef
+ , std::size_t token_id)
+ {
+ return unique_id<id_type>::get();
+ }
+ std::size_t add_token (char_type const* state, string_type const& tokendef
+ , std::size_t token_id)
+ {
+ return unique_id<id_type>::get();
+ }
+ std::size_t add_token(char_type const* state, token_set& tokset)
+ {
+ return unique_id<id_type>::get();
+ }
 
         // interface for pattern definition management
         void add_pattern (char_type const* state, string_type const& name
@@ -214,16 +251,16 @@
 
         std::size_t add_state(char_type const* state)
         {
- return 0;
+ return get_state_id(state);
         }
         string_type initial_state() const
         {
- return lex::lexertl::static_::lexer_state_names[0];
+ return tables_type::state_name(0);
         }
 
         // register a semantic action with the given id
         template <typename F>
- void add_action(id_type id, std::size_t state, F act)
+ void add_action(id_type unique_id, std::size_t state, F act)
         {
             // If you get compilation errors below stating value_type not being
             // a member of boost::fusion::unused_type, then you are probably
@@ -232,14 +269,22 @@
             // lexer (instead of the static_lexer class).
             typedef typename Functor::semantic_actions_type::value_type
                 value_type;
+ typedef typename Functor::wrap_action_type wrapper_type;
+
+ if (actions_.size() <= state)
+ actions_.resize(state + 1);
+
+ value_type& actions (actions_[state]);
+ if (actions.size() <= unique_id)
+ actions.resize(unique_id + 1);
 
- actions.insert(value_type(std::make_pair(id, state), act));
+ actions[unique_id] = wrapper_type::call(act);
         }
 
         bool init_dfa() const { return true; }
 
     private:
- typename Functor::semantic_actions_type actions;
+ typename Functor::semantic_actions_type actions_;
     };
 
     ///////////////////////////////////////////////////////////////////////////
@@ -265,16 +310,19 @@
     //
     ///////////////////////////////////////////////////////////////////////////
     template <typename Token = token<>
+ , typename LexerTables = static_::lexer
       , typename Iterator = typename Token::iterator_type
       , typename Functor = static_functor<Token, Iterator, mpl::true_>
- , typename TokenSet = lex::token_set<static_token_set<Token, Iterator> > >
+ , typename TokenSet =
+ lex::token_set<static_token_set<Token, LexerTables, Iterator> > >
     class static_actor_lexer
- : public static_lexer<Token, Iterator, Functor, TokenSet>
+ : public static_lexer<Token, LexerTables, Iterator, Functor, TokenSet>
     {
     protected:
         // Lexer instances can be created by means of a derived class only.
         static_actor_lexer(unsigned int flags)
- : static_lexer<Token, Iterator, Functor, TokenSet>(flags) {}
+ : static_lexer<Token, LexerTables, Iterator, Functor, TokenSet>(flags)
+ {}
     };
 
 }}}}

Added: trunk/boost/spirit/home/lex/lexer/lexertl/unique_id.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/unique_id.hpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -0,0 +1,39 @@
+// Copyright (c) 2001-2009 Hartmut Kaiser
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#if !defined(BOOST_SPIRIT_LEX_UNIQUE_ID_MAY_24_2009_0313PM)
+#define BOOST_SPIRIT_LEX_UNIQUE_ID_MAY_24_2009_0313PM
+
+#if defined(_MSC_VER)
+#pragma once
+#endif
+
+namespace boost { namespace spirit { namespace lex { namespace lexertl
+{
+ ///////////////////////////////////////////////////////////////////////////
+ // The next_id template needs to be specialized for any non-default token
+ // id type used by a custom token type. It needs to expose a function
+ // 'static Idtype get()' returning the next available unique id each time
+ // it is called.
+ template <typename Idtype>
+ struct unique_id;
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Default specialization for the next_id template returning the next
+ // available token id.
+ template <>
+ struct unique_id<std::size_t>
+ {
+ static std::size_t get()
+ {
+ static std::size_t unique_id_ = 0;
+ return unique_id_++;
+ }
+ };
+
+}}}}
+
+#endif
+

Modified: trunk/boost/spirit/home/lex/lexer/string_token_def.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/string_token_def.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/string_token_def.hpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -66,16 +66,18 @@
             typedef typename LexerDef::id_type id_type;
             if (~0U == id_)
                 id_ = next_id<id_type>::get();
- lexdef.add_token (state.c_str(), str_, id_);
+ unique_id_ = lexdef.add_token (state.c_str(), str_, id_);
         }
 
         template <typename LexerDef>
         void add_actions(LexerDef& lexdef) const {}
 
         std::size_t id() const { return id_; }
+ std::size_t unique_id() const { return unique_id_; }
 
         string_type str_;
         mutable std::size_t id_;
+ mutable std::size_t unique_id_;
     };
 
     ///////////////////////////////////////////////////////////////////////////

Modified: trunk/boost/spirit/home/lex/lexer/token_def.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/token_def.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/token_def.hpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -125,10 +125,10 @@
 
                 // If the following assertion fires you probably forgot to
                 // associate this token definition with a lexer instance.
- BOOST_ASSERT((std::size_t)(~0) != token_state);
+ BOOST_ASSERT((std::size_t)(~0) != token_state_);
 
                 token_type &t = *first;
- if (token_id == t.id() && token_state == t.state()) {
+ if (token_id_ == t.id() && token_state_ == t.state()) {
                     qi::detail::assign_to(t, attr);
                     ++first;
                     return true;
@@ -156,16 +156,20 @@
             // is not possible. Please create a separate token_def instance
             // from the same regular expression for each lexer state it needs
             // to be associated with.
- BOOST_ASSERT(~0 == token_state || state_id == token_state);
+ BOOST_ASSERT(~0 == token_state_ || state_id == token_state_);
 
- token_state = state_id;
- if (0 == token_id)
- token_id = next_id<Idtype>::get();
-
- if (0 == def.which())
- lexdef.add_token(state.c_str(), get<string_type>(def), token_id);
- else
- lexdef.add_token(state.c_str(), get<char_type>(def), token_id);
+ token_state_ = state_id;
+ if (0 == token_id_)
+ token_id_ = next_id<Idtype>::get();
+
+ if (0 == def_.which()) {
+ unique_id_ = lexdef.add_token(state.c_str()
+ , get<string_type>(def_), token_id_);
+ }
+ else {
+ unique_id_ = lexdef.add_token(state.c_str()
+ , get<char_type>(def_), token_id_);
+ }
         }
 
         template <typename LexerDef>
@@ -179,48 +183,52 @@
         // Lex interface: constructing token definitions
         token_def()
           : proto_base_type(terminal_type::make(alias()))
- , def('\0'), token_id(), token_state(~0) {}
+ , def_('\0'), token_id_(), unique_id_(~0), token_state_(~0) {}
 
         explicit token_def(char_type def_, Idtype id_ = Idtype())
           : proto_base_type(terminal_type::make(alias()))
- , def(def_), token_id(Idtype() == id_ ? def_ : id_)
- , token_state(~0) {}
+ , def_(def_), token_id_(Idtype() == id_ ? def_ : id_)
+ , unique_id_(~0), token_state_(~0) {}
 
         explicit token_def(string_type const& def_, Idtype id_ = Idtype())
           : proto_base_type(terminal_type::make(alias()))
- , def(def_), token_id(id_), token_state(~0) {}
+ , def_(def_), token_id_(id_), unique_id_(~0), token_state_(~0) {}
 
         template <typename String>
         token_def& operator= (String const& definition)
         {
- def = definition;
- token_id = Idtype();
- token_state = std::size_t(~0);
+ def_ = definition;
+ token_id_ = Idtype();
+ unique_id_ = std::size_t(~0);
+ token_state_ = std::size_t(~0);
             return *this;
         }
         token_def& operator= (token_def const& rhs)
         {
- def = rhs.def;
- token_id = rhs.token_id;
- token_state = rhs.token_state;
+ def_ = rhs.def_;
+ token_id_ = rhs.token_id_;
+ unique_id_ = rhs.unique_id_;
+ token_state_ = rhs.token_state_;
             return *this;
         }
 
         // general accessors
- Idtype id() const { return token_id; }
- void id(Idtype id) { token_id = id; }
+ Idtype id() const { return token_id_; }
+ void id(Idtype id) { token_id_ = id; }
+ std::size_t unique_id() const { return unique_id_; }
 
         string_type definition() const
         {
- return (0 == def.which())
- ? get<string_type>(def) : string_type(1, get<char_type>(def));
+ return (0 == def_.which())
+ ? get<string_type>(def_) : string_type(1, get<char_type>(def_));
         }
- std::size_t state() const { return token_state; }
+ std::size_t state() const { return token_state_; }
 
     private:
- variant<string_type, char_type> def;
- mutable Idtype token_id;
- mutable std::size_t token_state;
+ variant<string_type, char_type> def_;
+ mutable Idtype token_id_;
+ mutable std::size_t unique_id_;
+ mutable std::size_t token_state_;
     };
 
 }}}

Modified: trunk/boost/spirit/home/lex/reference.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/reference.hpp (original)
+++ trunk/boost/spirit/home/lex/reference.hpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -57,6 +57,10 @@
         {
             return this->ref.get().id();
         }
+ std::size_t unique_id() const
+ {
+ return this->ref.get().unique_id();
+ }
         std::size_t state() const
         {
             return this->ref.get().state();

Modified: trunk/boost/spirit/home/support/detail/lexer/generator.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/generator.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/generator.hpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -148,7 +148,7 @@
         const typename rules::string_deque_deque &regexes_ =
             rules_.regexes ();
         const typename rules::id_vector_deque &ids_ = rules_.ids ();
- std::size_t unique_id_ = 0;
+ const typename rules::id_vector_deque &unique_ids_ = rules_.unique_ids ();
         const typename rules::id_vector_deque &states_ = rules_.states ();
         typename rules::string_deque::const_iterator regex_iter_ =
             regexes_[state_].begin ();
@@ -156,6 +156,8 @@
             regexes_[state_].end ();
         typename rules::id_vector::const_iterator ids_iter_ =
             ids_[state_].begin ();
+ typename rules::id_vector::const_iterator unique_ids_iter_ =
+ unique_ids_[state_].begin ();
         typename rules::id_vector::const_iterator states_iter_ =
             states_[state_].begin ();
         const typename rules::string &regex_ = *regex_iter_;
@@ -171,12 +173,13 @@
             seen_BOL_assertion_, seen_EOL_assertion_);
 
         detail::node *root_ = parser::parse (regex_.c_str (),
- regex_.c_str () + regex_.size (), *ids_iter_, unique_id_++,
+ regex_.c_str () + regex_.size (), *ids_iter_, *unique_ids_iter_,
             *states_iter_, rules_.flags (), rules_.locale (), node_ptr_vector_,
             macromap_, token_map_, seen_BOL_assertion_, seen_EOL_assertion_);
 
         ++regex_iter_;
         ++ids_iter_;
+ ++unique_ids_iter_;
         ++states_iter_;
         tree_vector_.push_back (root_);
 
@@ -187,13 +190,14 @@
             const typename rules::string &regex_ = *regex_iter_;
 
             root_ = parser::parse (regex_.c_str (),
- regex_.c_str () + regex_.size (), *ids_iter_, unique_id_++,
+ regex_.c_str () + regex_.size (), *ids_iter_, *unique_ids_iter_,
                 *states_iter_, rules_.flags (), rules_.locale (),
                 node_ptr_vector_, macromap_, token_map_,
                 seen_BOL_assertion_, seen_EOL_assertion_);
             tree_vector_.push_back (root_);
             ++regex_iter_;
             ++ids_iter_;
+ ++unique_ids_iter_;
             ++states_iter_;
         }
 
@@ -282,7 +286,7 @@
                 locale_, node_ptr_vector_, macromap_, token_map_,
                 seen_BOL_assertion_, seen_EOL_assertion_);
             macro_iter_pair map_iter_ = macromap_.
- insert (macro_pair (name_, (detail::node const*)0));
+ insert (macro_pair (name_, 0));
 
             map_iter_.first->second = node_;
         }
@@ -506,7 +510,7 @@
                     }
                     else
                     {
- iter_ = lhs_->insert (++iter_, (charset*)0);
+ iter_ = lhs_->insert (++iter_, 0);
                         *iter_ = overlap_.release ();
 
                         // VC++ 6 Hack:
@@ -648,7 +652,7 @@
                     }
                     else
                     {
- iter_ = lhs_->insert (++iter_, (equivset*)0);
+ iter_ = lhs_->insert (++iter_, 0);
                         *iter_ = overlap_.release ();
 
                         // VC++ 6 Hack:

Modified: trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -425,6 +425,7 @@
         tree_node_stack_.top () = node_ptr_vector_->back ();
     }
 
+ // This is one of the most mind bending routines in this code...
     static void repeatn (const bool greedy_, const token &token_,
         node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
     {
@@ -449,9 +450,7 @@
 
             for (std::size_t i_ = 2; i_ < top_; ++i_)
             {
- node *temp_ = prev_->copy (node_ptr_vector_);
-
- curr_ = temp_;
+ curr_ = prev_->copy (node_ptr_vector_);
                 tree_node_stack_.push (0);
                 tree_node_stack_.top () = prev_;
                 sequence (node_ptr_vector_, tree_node_stack_);
@@ -462,9 +461,7 @@
             {
                 if (token_._min > 1)
                 {
- node *temp_ = prev_->copy (node_ptr_vector_);
-
- curr_ = temp_;
+ curr_ = prev_->copy (node_ptr_vector_);
                     tree_node_stack_.push (0);
                     tree_node_stack_.top () = prev_;
                     sequence (node_ptr_vector_, tree_node_stack_);
@@ -476,19 +473,15 @@
                     tree_node_stack_.push (0);
                     tree_node_stack_.top () = prev_;
                     optional (greedy_, node_ptr_vector_, tree_node_stack_);
-
- node *temp_ = tree_node_stack_.top ();
+ prev_ = tree_node_stack_.top ();
 
                     tree_node_stack_.pop ();
- prev_ = temp_;
 
                     const std::size_t count_ = token_._max - token_._min;
 
                     for (std::size_t i_ = 1; i_ < count_; ++i_)
                     {
- node *temp_ = prev_->copy (node_ptr_vector_);
-
- curr_ = temp_;
+ curr_ = prev_->copy (node_ptr_vector_);
                         tree_node_stack_.push (0);
                         tree_node_stack_.top () = prev_;
                         sequence (node_ptr_vector_, tree_node_stack_);
@@ -500,10 +493,7 @@
                     tree_node_stack_.push (0);
                     tree_node_stack_.top () = prev_;
                     zero_or_more (greedy_, node_ptr_vector_, tree_node_stack_);
-
- node *temp_ = tree_node_stack_.top ();
-
- prev_ = temp_;
+ prev_ = tree_node_stack_.top ();
                     tree_node_stack_.pop ();
                 }
             }

Modified: trunk/boost/spirit/home/support/detail/lexer/rules.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/rules.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/rules.hpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -82,10 +82,12 @@
     typedef std::map<string, std::size_t> string_size_t_map;
     typedef std::pair<string, std::size_t> string_size_t_pair;
     typedef std::pair<std::size_t, std::size_t> unique_id_key;
- typedef std::map<unique_id_key, std::size_t> unique_id_map;
 
- basic_rules (const regex_flags flags_ = dot_not_newline) :
- _flags (flags_)
+ basic_rules (const regex_flags flags_ = dot_not_newline,
+ std::size_t (*counter_ptr_)() = 0) :
+ _flags (flags_),
+ _counter (0),
+ _counter_ptr (counter_ptr_)
     {
         add_state (initial ());
     }
@@ -97,6 +99,7 @@
         _macroset.clear ();
         _regexes.clear ();
         _ids.clear ();
+ _unique_ids.clear ();
         _states.clear ();
         _flags = dot_not_newline;
         _locale = std::locale ();
@@ -111,6 +114,7 @@
         {
             _regexes[state_].clear ();
             _ids[state_].clear ();
+ _unique_ids[state_].clear ();
             _states[state_].clear ();
         }
     }
@@ -125,6 +129,11 @@
         return _flags;
     }
 
+ std::size_t next_unique_id ()
+ {
+ return _counter_ptr ? _counter_ptr() : _counter++;
+ }
+
     std::locale imbue (std::locale &locale_)
     {
         std::locale loc_ = _locale;
@@ -173,6 +182,7 @@
         {
             _regexes.push_back (string_deque ());
             _ids.push_back (id_vector ());
+ _unique_ids.push_back (id_vector ());
             _states.push_back (id_vector ());
 
             if (string (name_) != initial ())
@@ -237,89 +247,102 @@
 
     std::size_t add (const string &regex_, const std::size_t id_)
     {
+ const std::size_t counter_ = next_unique_id();
+
         check_for_invalid_id (id_);
         _regexes[0].push_back (regex_);
         _ids[0].push_back (id_);
+ _unique_ids[0].push_back (counter_);
         _states[0].push_back (0);
- return _regexes[0].size () - 1;
+
+ return counter_;
     }
 
- void add (const CharT *curr_state_, const CharT *regex_,
- const CharT *new_state_, id_vector *id_vec_ = 0)
+ std::size_t add (const CharT *curr_state_, const CharT *regex_,
+ const CharT *new_state_)
     {
- add (curr_state_, string (regex_), new_state_, id_vec_);
+ return add (curr_state_, string (regex_), new_state_);
     }
 
- void add (const CharT *curr_state_, const CharT *regex_start_,
- const CharT *regex_end_, const CharT *new_state_,
- id_vector *id_vec_ = 0)
+ std::size_t add (const CharT *curr_state_, const CharT *regex_start_,
+ const CharT *regex_end_, const CharT *new_state_)
     {
- add (curr_state_, string (regex_start_, regex_end_),
- new_state_, id_vec_);
+ return add (curr_state_, string (regex_start_, regex_end_),
+ new_state_);
     }
 
- void add (const CharT *curr_state_, const string &regex_,
- const CharT *new_state_, id_vector *id_vec_ = 0)
+ std::size_t add (const CharT *curr_state_, const string &regex_,
+ const CharT *new_state_)
     {
- add (curr_state_, regex_, 0, new_state_, false, id_vec_);
+ return add (curr_state_, regex_, 0, new_state_, false);
     }
 
- void add (const CharT *curr_state_, const CharT *regex_,
- const std::size_t id_, const CharT *new_state_, id_vector *id_vec_ = 0)
+ std::size_t add (const CharT *curr_state_, const CharT *regex_,
+ const std::size_t id_, const CharT *new_state_)
     {
- add (curr_state_, string (regex_), id_, new_state_, id_vec_);
+ return add (curr_state_, string (regex_), id_, new_state_);
     }
 
- void add (const CharT *curr_state_, const CharT *regex_start_,
+ std::size_t add (const CharT *curr_state_, const CharT *regex_start_,
         const CharT *regex_end_, const std::size_t id_,
- const CharT *new_state_, id_vector *id_vec_ = 0)
+ const CharT *new_state_)
     {
- add (curr_state_, string (regex_start_, regex_end_), id_,
- new_state_, id_vec_);
+ return add (curr_state_, string (regex_start_, regex_end_), id_,
+ new_state_);
     }
 
- void add (const CharT *curr_state_, const string &regex_,
- const std::size_t id_, const CharT *new_state_, id_vector *id_vec_ = 0)
+ std::size_t add (const CharT *curr_state_, const string &regex_,
+ const std::size_t id_, const CharT *new_state_)
     {
- add (curr_state_, regex_, id_, new_state_, true, id_vec_);
+ return add (curr_state_, regex_, id_, new_state_, true);
     }
 
- void add (const CharT *curr_state_, const basic_rules &rules_,
- id_vector *id_vec_ = 0)
+ std::size_t add (const CharT *curr_state_, const basic_rules &rules_)
     {
+ const std::size_t counter_ = next_unique_id();
+ const string_pair_deque &macros_ = rules_.macrodeque ();
+ typename string_pair_deque::const_iterator macro_iter_ =
+ macros_.begin ();
+ typename string_pair_deque::const_iterator macro_end_ =
+ macros_.end ();
         const string_deque_deque &regexes_ = rules_.regexes ();
         const id_vector_deque &ids_ = rules_.ids ();
+ const id_vector_deque &unique_ids_ = rules_.unique_ids ();
         typename string_deque_deque::const_iterator state_regex_iter_ =
             regexes_.begin ();
         typename string_deque_deque::const_iterator state_regex_end_ =
             regexes_.end ();
         typename id_vector_deque::const_iterator state_id_iter_ =
             ids_.begin ();
+ typename id_vector_deque::const_iterator state_uid_iter_ =
+ unique_ids_.begin();
         typename string_deque::const_iterator regex_iter_;
         typename string_deque::const_iterator regex_end_;
         typename id_vector::const_iterator id_iter_;
- id_vector *temp_id_vec_ = id_vec_;
+ typename id_vector::const_iterator uid_iter_;
+
+ for (; macro_iter_ != macro_end_; ++macro_iter_)
+ {
+ add_macro (macro_iter_->first.c_str (),
+ macro_iter_->second.c_str ());
+ }
 
         for (; state_regex_iter_ != state_regex_end_; ++state_regex_iter_)
         {
             regex_iter_ = state_regex_iter_->begin ();
             regex_end_ = state_regex_iter_->end ();
             id_iter_ = state_id_iter_->begin ();
+ uid_iter_ = state_uid_iter_->begin ();
 
- for (; regex_iter_ != regex_end_; ++regex_iter_, ++id_iter_)
+ for (; regex_iter_ != regex_end_; ++regex_iter_, ++id_iter_,
+ ++uid_iter_)
             {
- add (curr_state_, *regex_iter_, *id_iter_, detail::dot<CharT>::str(),
- temp_id_vec_);
-
- if (temp_id_vec_)
- {
- // As suggested by Hartmut, only fill the id_vec_ once.
- // The dfa sizes can be examined at the end to get a range
- // of ids.
- temp_id_vec_ = 0;
- }
+ add (curr_state_, *regex_iter_, *id_iter_,
+ detail::dot<CharT>::str(), true, *uid_iter_);
             }
         }
+
+ return counter_;
     }
 
     const string_size_t_map &statemap () const
@@ -342,6 +365,11 @@
         return _ids;
     }
 
+ const id_vector_deque &unique_ids () const
+ {
+ return _unique_ids;
+ }
+
     const id_vector_deque &states () const
     {
         return _states;
@@ -370,43 +398,27 @@
         return detail::initial<CharT>::str ();
     }
 
- std::size_t retrieve_id (std::size_t state, std::size_t id) const
- {
- unique_id_key key (state, id);
- typename unique_id_map::const_iterator it = _unique_ids.find (key);
-
- if (it == _unique_ids.end ())
- {
- return npos;
- }
-
- return (*it).second;
- }
-
 private:
     string_size_t_map _statemap;
     string_pair_deque _macrodeque;
     string_set _macroset;
     string_deque_deque _regexes;
     id_vector_deque _ids;
+ id_vector_deque _unique_ids;
     id_vector_deque _states;
     regex_flags _flags;
+ std::size_t _counter;
+ std::size_t (*_counter_ptr)();
     std::locale _locale;
     string_deque _lexer_state_names;
- unique_id_map _unique_ids;
 
- void add (const CharT *curr_state_, const string &regex_,
+ std::size_t add (const CharT *curr_state_, const string &regex_,
         const std::size_t id_, const CharT *new_state_, const bool check_,
- id_vector *id_vec_ = 0)
+ const std::size_t uid_ = npos)
     {
         const bool star_ = *curr_state_ == '*' && *(curr_state_ + 1) == 0;
         const bool dot_ = *new_state_ == '.' && *(new_state_ + 1) == 0;
 
- if (id_vec_)
- {
- id_vec_->clear();
- }
-
         if (check_)
         {
             check_for_invalid_id (id_);
@@ -498,21 +510,33 @@
             }
         }
 
+ std::size_t first_counter_ = npos;
+
         for (std::size_t i_ = 0, size_ = states_.size (); i_ < size_; ++i_)
         {
             const std::size_t curr_ = states_[i_];
 
             _regexes[curr_].push_back (regex_);
             _ids[curr_].push_back (id_);
- _states[curr_].push_back (dot_ ? curr_ : new_);
 
- if (id_vec_)
+ if (uid_ == npos)
             {
- id_vec_->push_back (_regexes[curr_].size () - 1);
+ std::size_t counter_ = next_unique_id();
+ if (first_counter_ == npos)
+ first_counter_ = counter_;
+ _unique_ids[curr_].push_back (counter_);
+ }
+ else
+ {
+ if (first_counter_ == npos)
+ first_counter_ = uid_;
+ _unique_ids[curr_].push_back (uid_);
             }
 
- map_id (dot_ ? curr_ : new_, id_, _regexes[curr_].size () - 1);
+ _states[curr_].push_back (dot_ ? curr_ : new_);
         }
+
+ return first_counter_;
     }
 
     void validate (const CharT *name_) const
@@ -597,22 +621,6 @@
             break;
         }
     }
-
- bool map_id (std::size_t state, std::size_t id, std::size_t unique_id)
- {
- typedef typename unique_id_map::iterator iterator_type;
-
- unique_id_key key (state, id);
- iterator_type it = _unique_ids.find (key);
- if (it != _unique_ids.end ())
- {
- (*it).second = unique_id;
- return false;
- }
-
- typedef typename unique_id_map::value_type value_type;
- return _unique_ids.insert (value_type (key, unique_id)).second;
- }
 };
 
 typedef basic_rules<char> rules;

Modified: trunk/libs/spirit/example/lex/static_lexer/Jamfile
==============================================================================
--- trunk/libs/spirit/example/lex/static_lexer/Jamfile (original)
+++ trunk/libs/spirit/example/lex/static_lexer/Jamfile 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -10,3 +10,6 @@
 exe word_count_generate : word_count_generate.cpp ;
 exe word_count_static : word_count_static.cpp ;
 
+exe word_count_lexer_generate : word_count_lexer_generate.cpp ;
+exe word_count_lexer_static : word_count_lexer_static.cpp ;
+

Modified: trunk/libs/spirit/example/lex/static_lexer/word_count_generate.cpp
==============================================================================
--- trunk/libs/spirit/example/lex/static_lexer/word_count_generate.cpp (original)
+++ trunk/libs/spirit/example/lex/static_lexer/word_count_generate.cpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -36,8 +36,11 @@
     std::ofstream out(argc < 2 ? "word_count_static.hpp" : argv[1]);
 
     // invoke the generator, passing the token definition, the output stream
- // and the name prefix of the tokenizing function to be generated
- char const* function_name = (argc < 3 ? "" : argv[2]);
- return lexertl::generate_static(word_count, out, function_name) ? 0 : -1;
+ // and the name suffix of the tables and functions to be generated
+ //
+ // The suffix "wc" used below results in a type lexertl::static_::lexer_wc
+ // to be generated, which needs to be passed as a template parameter to the
+ // lexertl::static_lexer template (see word_count_static.cpp).
+ return lexertl::generate_static(word_count, out, "wc") ? 0 : -1;
 }
 //]

Added: trunk/libs/spirit/example/lex/static_lexer/word_count_lexer_generate.cpp
==============================================================================
--- (empty file)
+++ trunk/libs/spirit/example/lex/static_lexer/word_count_lexer_generate.cpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -0,0 +1,46 @@
+// Copyright (c) 2001-2009 Hartmut Kaiser
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// The purpose of this example is to show, how it is possible to use a lexer
+// token definition for two purposes:
+//
+// . To generate C++ code implementing a static lexical analyzer allowing
+// to recognize all defined tokens (this file)
+// . To integrate the generated C++ lexer into the /Spirit/ framework.
+// (see the file: word_count_lexer_static.cpp)
+
+// #define BOOST_SPIRIT_LEXERTL_DEBUG
+
+#include <boost/config/warning_disable.hpp>
+#include <boost/spirit/include/lex_lexertl.hpp>
+#include <boost/spirit/include/lex_generate_static_lexertl.hpp>
+
+#include <fstream>
+
+#include "word_count_lexer_tokens.hpp"
+
+using namespace boost::spirit;
+using namespace boost::spirit::lex;
+
+///////////////////////////////////////////////////////////////////////////////
+//[wcl_static_generate_main
+int main(int argc, char* argv[])
+{
+ // create the lexer object instance needed to invoke the generator
+ word_count_lexer_tokens<lexertl::actor_lexer<> > word_count; // the token definition
+
+ // open the output file, where the generated tokenizer function will be
+ // written to
+ std::ofstream out(argc < 2 ? "word_count_lexer_static.hpp" : argv[1]);
+
+ // invoke the generator, passing the token definition, the output stream
+ // and the name prefix of the tokenizing function to be generated
+ //
+ // The suffix "wcl" used below results in a type lexertl::static_::lexer_wcl
+ // to be generated, which needs to be passed as a template parameter to the
+ // lexertl::static_lexer template (see word_count_lexer_static.cpp).
+ return lexertl::generate_static(word_count, out, "wcl") ? 0 : -1;
+}
+//]

Added: trunk/libs/spirit/example/lex/static_lexer/word_count_lexer_static.cpp
==============================================================================
--- (empty file)
+++ trunk/libs/spirit/example/lex/static_lexer/word_count_lexer_static.cpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -0,0 +1,85 @@
+// Copyright (c) 2001-2009 Hartmut Kaiser
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// The purpose of this example is to show, how it is possible to use a lexer
+// token definition for two purposes:
+//
+// . To generate C++ code implementing a static lexical analyzer allowing
+// to recognize all defined tokens
+// . To integrate the generated C++ lexer into the /Spirit/ framework.
+//
+
+// #define BOOST_SPIRIT_DEBUG
+// #define BOOST_SPIRIT_LEXERTL_DEBUG
+
+#include <boost/config/warning_disable.hpp>
+#include <boost/spirit/include/lex_static_lexertl.hpp>
+
+#include <iostream>
+#include <string>
+
+#include "../example.hpp"
+#include "word_count_lexer_tokens.hpp" // token definition
+#include "word_count_lexer_static.hpp" // generated tokenizer
+
+using namespace boost::spirit;
+using namespace boost::spirit::lex;
+
+///////////////////////////////////////////////////////////////////////////////
+//[wcl_static_main
+int main(int argc, char* argv[])
+{
+ // read input from the given file
+ std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
+
+ // Specifying 'omitted' as the token attribute type generates a token class
+ // notholding any token attribute at all (not even the iterator_range of the
+ // matched input sequence), therefor optimizing the token, the lexer, and
+ // possibly the parser implementation as much as possible.
+ //
+ // Specifying mpl::false_ as the 3rd template parameter generates a token
+ // type and an iterator, both holding no lexer state, allowing for even more
+ // aggressive optimizations.
+ //
+ // As a result the token instances contain the token ids as the only data
+ // member.
+ typedef lexertl::token<char const*, omitted, boost::mpl::false_> token_type;
+
+ // Define the lexer type to be used as the base class for our token
+ // definition.
+ //
+ // This is the only place where the code is different from an equivalent
+ // dynamic lexical analyzer. We use the `lexertl::static_lexer<>` instead of
+ // the `lexertl::lexer<>` as the base class for our token defintion type.
+ //
+ // As we specified the suffix "wcl" while generating the static tables we
+ // need to pass the type lexertl::static_::lexer_wcl as the second template
+ // parameter below (see word_count_lexer_generate.cpp).
+ typedef lexertl::static_actor_lexer<
+ token_type, lexertl::static_::lexer_wcl
+ > lexer_type;
+
+ // create the lexer object instance needed to invoke the lexical analysis
+ word_count_lexer_tokens<lexer_type> word_count_lexer;
+
+ // tokenize the given string, all generated tokens are discarded
+ char const* first = str.c_str();
+ char const* last = &first[str.size()];
+ bool r = tokenize(first, last, word_count_lexer);
+
+ if (r) {
+ std::cout << "lines: " << word_count_lexer.l
+ << ", words: " << word_count_lexer.w
+ << ", characters: " << word_count_lexer.c
+ << "\n";
+ }
+ else {
+ std::string rest(first, last);
+ std::cout << "Lexical analysis failed\n" << "stopped at: \""
+ << rest << "\"\n";
+ }
+ return 0;
+}
+//]

Added: trunk/libs/spirit/example/lex/static_lexer/word_count_lexer_static.hpp
==============================================================================
--- (empty file)
+++ trunk/libs/spirit/example/lex/static_lexer/word_count_lexer_static.hpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -0,0 +1,145 @@
+// Copyright (c) 2008-2009 Ben Hanson
+// Copyright (c) 2008-2009 Hartmut Kaiser
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// Auto-generated by boost::lexer, do not edit
+
+#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_WCL_MAY_24_2009_18_48_18)
+#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_WCL_MAY_24_2009_18_48_18
+
+#include <boost/detail/iterator.hpp>
+#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
+
+// the generated table of state names and the tokenizer have to be
+// defined in the boost::spirit::lex::lexertl::static_ namespace
+namespace boost { namespace spirit { namespace lex { namespace lexertl { namespace static_ {
+
+// this table defines the names of the lexer states
+char const* const lexer_state_names_wcl[1] =
+{
+ "INITIAL"
+};
+
+// this variable defines the number of lexer states
+std::size_t const lexer_state_count_wcl = 1;
+
+// this function returns the next matched token
+template<typename Iterator>
+std::size_t next_token_wcl (std::size_t &start_state_, Iterator const& start_,
+ Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)
+{
+ enum {end_state_index, id_index, unique_id_index, state_index, bol_index,
+ eol_index, dead_state_index, dfa_offset};
+
+ static const std::size_t npos = static_cast<std::size_t>(~0);
+ static const std::size_t lookup_[256] = {
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 8, 7, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 8, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9 };
+ static const std::size_t dfa_alphabet_ = 10;
+ static const std::size_t dfa_[50] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 3, 4, 2, 1, 65536, 0, 0,
+ 0, 0, 0, 0, 0, 2, 1, 65537,
+ 1, 0, 0, 0, 0, 0, 0, 0,
+ 1, 65538, 2, 0, 0, 0, 0, 0,
+ 0, 0 };
+
+ if (start_token_ == end_) return 0;
+
+ const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
+ Iterator curr_ = start_token_;
+ bool end_state_ = *ptr_ != 0;
+ std::size_t id_ = *(ptr_ + id_index);
+ std::size_t uid_ = *(ptr_ + unique_id_index);
+ Iterator end_token_ = start_token_;
+
+ while (curr_ != end_)
+ {
+ std::size_t const state_ =
+ ptr_[lookup_[static_cast<unsigned char>(*curr_++)]];
+
+ if (state_ == 0) break;
+
+ ptr_ = &dfa_[state_ * dfa_alphabet_];
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
+ end_token_ = curr_;
+ }
+ }
+
+ if (end_state_)
+ {
+ // return longest match
+ start_token_ = end_token_;
+ }
+ else
+ {
+ id_ = npos;
+ uid_ = npos;
+ }
+
+ unique_id_ = uid_;
+ return id_;
+}
+
+// this defines a generic accessor for the information above
+struct lexer_wcl
+{
+ static std::size_t const state_count()
+ {
+ return lexer_state_count_wcl;
+ }
+
+ static char const* const state_name(std::size_t idx)
+ {
+ return lexer_state_names_wcl[idx];
+ }
+
+ template<typename Iterator>
+ static std::size_t next(std::size_t &start_state_, Iterator const& start_
+ , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)
+ {
+ return next_token_wcl(start_state_, start_, start_token_, end_, unique_id_);
+ }
+};
+
+}}}}} // namespace boost::spirit::lex::lexertl::static_
+
+#endif

Added: trunk/libs/spirit/example/lex/static_lexer/word_count_lexer_tokens.hpp
==============================================================================
--- (empty file)
+++ trunk/libs/spirit/example/lex/static_lexer/word_count_lexer_tokens.hpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -0,0 +1,47 @@
+// Copyright (c) 2001-2009 Hartmut Kaiser
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#if !defined(SPIRIT_LEXER_EXAMPLE_WORD_COUNT_LEXER_TOKENS_FEB_10_2008_0739PM)
+#define SPIRIT_LEXER_EXAMPLE_WORD_COUNT_LEXER_TOKENS_FEB_10_2008_0739PM
+
+#include <boost/spirit/include/phoenix_operator.hpp>
+#include <boost/spirit/include/phoenix_statement.hpp>
+#include <boost/spirit/include/phoenix_algorithm.hpp>
+#include <boost/spirit/include/phoenix_core.hpp>
+
+///////////////////////////////////////////////////////////////////////////////
+// Token definition: We use the lexertl based lexer engine as the underlying
+// lexer type.
+//
+// Note, the token definition type is derived from the 'lexertl_actor_lexer'
+// template, which is a necessary to being able to use lexer semantic actions.
+///////////////////////////////////////////////////////////////////////////////
+//[wcl_static_token_definition
+template <typename Lexer>
+struct word_count_lexer_tokens : boost::spirit::lex::lexer<Lexer>
+{
+ word_count_lexer_tokens()
+ : c(0), w(0), l(0)
+ , word("[^ \t\n]+") // define tokens
+ , eol("\n")
+ , any(".")
+ {
+ using boost::phoenix::ref;
+ using boost::phoenix::distance;
+
+ // associate tokens with the lexer
+ this->self
+ = word [++ref(w), ref(c) += distance(_1)]
+ | eol [++ref(c), ++ref(l)]
+ | any [++ref(c)]
+ ;
+ }
+
+ std::size_t c, w, l;
+ boost::spirit::lex::token_def<> word, eol, any;
+};
+//]
+
+#endif

Modified: trunk/libs/spirit/example/lex/static_lexer/word_count_static.cpp
==============================================================================
--- trunk/libs/spirit/example/lex/static_lexer/word_count_static.cpp (original)
+++ trunk/libs/spirit/example/lex/static_lexer/word_count_static.cpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -28,7 +28,6 @@
 
 #include "../example.hpp"
 #include "word_count_tokens.hpp" // token definition
-
 #include "word_count_static.hpp" // generated tokenizer
 
 using namespace boost::spirit;
@@ -85,7 +84,13 @@
     // This is the only place where the code is different from an equivalent
     // dynamic lexical analyzer. We use the `lexertl::static_lexer<>` instead of
     // the `lexertl::lexer<>` as the base class for our token defintion type.
- typedef lexertl::static_lexer<token_type> lexer_type;
+ //
+ // As we specified the suffix "wc" while generating the static tables we
+ // need to pass the type lexertl::static_::lexer_wc as the second template
+ // parameter below (see word_count_generate.cpp).
+ typedef lexertl::static_lexer<
+ token_type, lexertl::static_::lexer_wc
+ > lexer_type;
 
     // Define the iterator type exposed by the lexer.
     typedef word_count_tokens<lexer_type>::iterator_type iterator_type;

Modified: trunk/libs/spirit/example/lex/static_lexer/word_count_static.hpp
==============================================================================
--- trunk/libs/spirit/example/lex/static_lexer/word_count_static.hpp (original)
+++ trunk/libs/spirit/example/lex/static_lexer/word_count_static.hpp 2009-05-24 20:31:48 EDT (Sun, 24 May 2009)
@@ -6,8 +6,8 @@
 
 // Auto-generated by boost::lexer, do not edit
 
-#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_MAY_22_2009_09_41_02)
-#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_MAY_22_2009_09_41_02
+#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_WC_MAY_24_2009_18_48_54)
+#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_WC_MAY_24_2009_18_48_54
 
 #include <boost/detail/iterator.hpp>
 #include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
@@ -17,20 +17,22 @@
 namespace boost { namespace spirit { namespace lex { namespace lexertl { namespace static_ {
 
 // this table defines the names of the lexer states
-char const* const lexer_state_names[1] =
+char const* const lexer_state_names_wc[1] =
 {
     "INITIAL"
 };
 
 // this variable defines the number of lexer states
-std::size_t const lexer_state_count = 1;
+std::size_t const lexer_state_count_wc = 1;
 
+// this function returns the next matched token
 template<typename Iterator>
-std::size_t next_token (std::size_t &start_state_, Iterator const& start_,
+std::size_t next_token_wc (std::size_t &start_state_, Iterator const& start_,
     Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)
 {
     enum {end_state_index, id_index, unique_id_index, state_index, bol_index,
         eol_index, dead_state_index, dfa_offset};
+
     static const std::size_t npos = static_cast<std::size_t>(~0);
     static const std::size_t lookup_[256] = {
         8, 8, 8, 8, 8, 8, 8, 8,
@@ -117,6 +119,27 @@
     return id_;
 }
 
+// this defines a generic accessor for the information above
+struct lexer_wc
+{
+ static std::size_t const state_count()
+ {
+ return lexer_state_count_wc;
+ }
+
+ static char const* const state_name(std::size_t idx)
+ {
+ return lexer_state_names_wc[idx];
+ }
+
+ template<typename Iterator>
+ static std::size_t next(std::size_t &start_state_, Iterator const& start_
+ , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)
+ {
+ return next_token_wc(start_state_, start_, start_token_, end_, unique_id_);
+ }
+};
+
 }}}}} // namespace boost::spirit::lex::lexertl::static_
 
 #endif


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk