Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r57529 - in trunk/boost/spirit/home: lex/lexer/lexertl support/detail/lexer
From: hartmut.kaiser_at_[hidden]
Date: 2009-11-09 21:00:28


Author: hkaiser
Date: 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
New Revision: 57529
URL: http://svn.boost.org/trac/boost/changeset/57529

Log:
Spirit: implemented forward iterator support for lexer
Text files modified:
   trunk/boost/spirit/home/lex/lexer/lexertl/functor_data.hpp | 13 +
   trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp | 122 ++++++++++++++------
   trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp | 56 ++++++--
   trunk/boost/spirit/home/lex/lexer/lexertl/static_functor_data.hpp | 16 +-
   trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp | 11 +
   trunk/boost/spirit/home/support/detail/lexer/generate_cpp.hpp | 236 ++++++++++++++++++++++-----------------
   trunk/boost/spirit/home/support/detail/lexer/generator.hpp | 9 +
   trunk/boost/spirit/home/support/detail/lexer/input.hpp | 19 ++-
   8 files changed, 303 insertions(+), 179 deletions(-)

Modified: trunk/boost/spirit/home/lex/lexer/lexertl/functor_data.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/functor_data.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/functor_data.hpp 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -54,7 +54,8 @@
             data (IterData const& data_, Iterator& first, Iterator const& last)
               : first_(first), last_(last)
               , state_machine_(data_.state_machine_)
- , rules_(data_.rules_) {}
+ , rules_(data_.rules_)
+ , bol_(data_.state_machine_.data()._seen_BOL_assertion) {}
 
             // The following functions are used by the implementation of the
             // placeholder '_state'.
@@ -88,7 +89,7 @@
                 return it;
             }
 
- // The function more() is used by the implemention of the support
+ // The function more() is used by the implementation of the support
             // function lex::more(). Its functionality is equivalent to flex'
             // function yymore(): it tells the lexer that the next time it
             // matches a rule, the corresponding token should be appended onto
@@ -130,7 +131,7 @@
             std::size_t next(Iterator& end, std::size_t& unique_id)
             {
                 typedef basic_iterator_tokeniser<Iterator> tokenizer;
- return tokenizer::next(state_machine_, first_, end, last_
+ return tokenizer::next(state_machine_, bol_, end, last_
                   , unique_id);
             }
 
@@ -162,6 +163,8 @@
             boost::lexer::basic_state_machine<char_type> const& state_machine_;
             boost::lexer::basic_rules<char_type> const& rules_;
 
+ bool bol_; // helper storing whether last character was \n
+
         private:
             // silence MSVC warning C4512: assignment operator could not be generated
             data& operator= (data const&);
@@ -223,7 +226,7 @@
             {
                 typedef basic_iterator_tokeniser<Iterator> tokenizer;
                 return tokenizer::next(this->state_machine_, state_,
- this->get_first(), end, this->get_eoi(), unique_id);
+ this->bol_, end, this->get_eoi(), unique_id);
             }
 
             std::size_t& get_state() { return state_; }
@@ -290,7 +293,7 @@
                 return it;
             }
 
- // The function more() is used by the implemention of the support
+ // The function more() is used by the implementation of the support
             // function lex::more(). Its functionality is equivalent to flex'
             // function yymore(): it tells the lexer that the next time it
             // matches a rule, the corresponding token should be appended onto

Modified: trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -89,7 +89,8 @@
     }
 
     inline bool
- generate_cpp_state_table (std::ostream &os_, char const* name_suffix = "")
+ generate_cpp_state_table (std::ostream &os_, char const* name_suffix
+ , bool bol, bool eol)
     {
         std::string suffix(name_suffix[0] ? "_" : "");
         suffix += name_suffix;
@@ -97,9 +98,13 @@
         generate_delimiter(os_);
         os_ << "// this defines a generic accessors for the information above\n";
         os_ << "struct lexer" << suffix << "\n{\n";
- os_ << " // version number of compatible static lexer engine\n";
- os_ << " enum { static_version = "
- << boost::lexical_cast<std::string>(SPIRIT_STATIC_LEXER_VERSION) << " };\n\n";
+ os_ << " // version number and feature-set of compatible static lexer engine\n";
+ os_ << " enum\n";
+ os_ << " {\n static_version = "
+ << boost::lexical_cast<std::string>(SPIRIT_STATIC_LEXER_VERSION) << ",\n";
+ os_ << " supports_bol = " << std::boolalpha << bol << ",\n";
+ os_ << " supports_eol = " << std::boolalpha << eol << "\n";
+ os_ << " };\n\n";
         os_ << " // return the number of lexer states\n";
         os_ << " static std::size_t const state_count()\n";
         os_ << " {\n return lexer_state_count" << suffix << "; \n }\n\n";
@@ -108,10 +113,10 @@
         os_ << " {\n return lexer_state_names" << suffix << "[idx]; \n }\n\n";
         os_ << " // return the next matched token\n";
         os_ << " template<typename Iterator>\n";
- os_ << " static std::size_t next(std::size_t &start_state_, Iterator const& start_\n";
+ os_ << " static std::size_t next(std::size_t &start_state_, bool& bol_\n";
         os_ << " , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n";
         os_ << " {\n return next_token" << suffix
- << "(start_state_, start_, start_token_, end_, unique_id_);\n }\n";
+ << "(start_state_, bol_, start_token_, end_, unique_id_);\n }\n";
         os_ << "};\n\n";
         return os_.good();
     }
@@ -184,11 +189,11 @@
 
         if (sm_.data()._seen_BOL_assertion)
         {
- os_ << "Iterator const& start_, ";
+ os_ << "bool& bol_, ";
         }
         else if (!optimize_parameters)
         {
- os_ << "Iterator const& /*start_*/, ";
+ os_ << "bool& /*bol_*/, ";
         }
 
         if (dfas_ > 1 || sm_.data()._seen_BOL_assertion || !optimize_parameters)
@@ -391,7 +396,15 @@
             os_ << " };\n";
         }
 
- os_ << "\n if (start_token_ == end_) return 0;\n\n";
+ os_ << "\n if (start_token_ == end_)\n";
+ os_ << " {\n";
+ os_ << " unique_id_ = boost::lexer::npos;\n";
+ os_ << " return 0;\n";
+ os_ << " }\n\n";
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bool bol = bol_;\n\n";
+ }
 
         if (dfas_ > 1)
         {
@@ -401,11 +414,19 @@
             os_ << " const std::size_t *dfa_ = dfa_arr_[start_state_];\n";
         }
 
- os_ << " const std::size_t *ptr_ = dfa_ + dfa_alphabet_;\n";
+ os_ << " std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n";
         os_ << " Iterator curr_ = start_token_;\n";
         os_ << " bool end_state_ = *ptr_ != 0;\n";
         os_ << " std::size_t id_ = *(ptr_ + id_index);\n";
         os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n";
+ if (dfas_ > 1)
+ {
+ os_ << " std::size_t end_start_state_ = start_state_;\n";
+ }
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bool end_bol_ = bol_;\n";
+ }
         os_ << " Iterator end_token_ = start_token_;\n\n";
 
         os_ << " while (curr_ != end_)\n";
@@ -423,8 +444,7 @@
 
         if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion)
         {
- os_ << " if (BOL_state_ && (start_token_ == start_ ||\n";
- os_ << " *(start_token_ - 1) == '\\n'))\n";
+ os_ << " if (BOL_state_ && bol)\n";
             os_ << " {\n";
             os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
             os_ << " }\n";
@@ -434,17 +454,18 @@
             os_ << " }\n";
             os_ << " else\n";
             os_ << " {\n";
- os_ << " std::size_t const state_ =\n";
-
             if (lookups_ == 256)
             {
- os_ << " ptr_[lookup_[<typename Traits::index_type>"
- "(*curr_++)]];\n";
+ os_ << " unsigned char index = \n";
+ os_ << " static_cast<unsigned char>(*curr_++);\n";
             }
             else
             {
- os_ << " ptr_[lookup_[*curr_++]];\n";
+ os_ << " std::size_t index = *curr_++\n";
             }
+ os_ << " bol = (index == '\n') ? true : false;\n";
+ os_ << " std::size_t const state_ = ptr_[\n";
+ os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
 
             os_ << '\n';
             os_ << " if (state_ == 0) break;\n";
@@ -454,24 +475,24 @@
         }
         else if (sm_.data()._seen_BOL_assertion)
         {
- os_ << " if (BOL_state_ && (start_token_ == start_ ||\n";
- os_ << " *(start_token_ - 1) == '\\n'))\n";
+ os_ << " if (BOL_state_ && bol)\n";
             os_ << " {\n";
             os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
             os_ << " }\n";
             os_ << " else\n";
             os_ << " {\n";
- os_ << " std::size_t const state_ =\n";
-
             if (lookups_ == 256)
             {
- os_ << " ptr_[lookup_[static_cast<unsigned char>"
- "(*curr_++)]];\n";
+ os_ << " unsigned char index = \n";
+ os_ << " static_cast<unsigned char>(*curr_++);\n";
             }
             else
             {
- os_ << " ptr_[lookup_[*curr_++]];\n";
+ os_ << " std::size_t index = *curr_++\n";
             }
+ os_ << " bol = (index == '\n') ? true : false;\n";
+ os_ << " std::size_t const state_ = ptr_[\n";
+ os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
 
             os_ << '\n';
             os_ << " if (state_ == 0) break;\n";
@@ -487,17 +508,18 @@
             os_ << " }\n";
             os_ << " else\n";
             os_ << " {\n";
- os_ << " std::size_t const state_ =\n";
-
             if (lookups_ == 256)
             {
- os_ << " ptr_[lookup_[static_cast<unsigned char>"
- "(*curr_++)]];\n";
+ os_ << " unsigned char index = \n";
+ os_ << " static_cast<unsigned char>(*curr_++);\n";
             }
             else
             {
- os_ << " ptr_[lookup_[*curr_++]];\n";
+ os_ << " std::size_t index = *curr_++\n";
             }
+ os_ << " bol = (index == '\n') ? true : false;\n";
+ os_ << " std::size_t const state_ = ptr_[\n";
+ os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
 
             os_ << '\n';
             os_ << " if (state_ == 0) break;\n";
@@ -530,12 +552,14 @@
         os_ << " end_state_ = true;\n";
         os_ << " id_ = *(ptr_ + id_index);\n";
         os_ << " uid_ = *(ptr_ + unique_id_index);\n";
-
         if (dfas_ > 1)
         {
- os_ << " start_state_ = *(ptr_ + state_index);\n";
+ os_ << " end_start_state_ = *(ptr_ + state_index);\n";
+ }
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " end_bol_ = bol;\n";
         }
-
         os_ << " end_token_ = curr_;\n";
         os_ << " }\n";
         os_ << " }\n\n";
@@ -553,12 +577,14 @@
             os_ << " end_state_ = true;\n";
             os_ << " id_ = *(ptr_ + id_index);\n";
             os_ << " uid_ = *(ptr_ + unique_id_index);\n";
-
             if (dfas_ > 1)
             {
- os_ << " start_state_ = *(ptr_ + state_index);\n";
+ os_ << " end_start_state_ = *(ptr_ + state_index);\n";
+ }
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " end_bol_ = bol;\n";
             }
-
             os_ << " end_token_ = curr_;\n";
             os_ << " }\n";
             os_ << " }\n\n";
@@ -567,17 +593,36 @@
         os_ << " if (end_state_)\n";
         os_ << " {\n";
         os_ << " // return longest match\n";
+ os_ << " start_state_ = end_start_state_;\n";
         os_ << " start_token_ = end_token_;\n";
 
         if (dfas_ > 1)
         {
- os_ << " if (id_ == 0) goto again;\n";
+ os_ << " if (id_ == 0)\n";
+ os_ << " {\n";
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bol_ = end_bol_;\n";
+ }
+ os_ << " goto again;\n";
+ os_ << " }\n";
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " else\n";
+ os_ << " {\n";
+ os_ << " bol_ = end_bol_;\n";
+ os_ << " }\n";
+ }
         }
 
         os_ << " }\n";
         os_ << " else\n";
         os_ << " {\n";
 
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bol_ = (*start_token_ == '\n') ? true : false;\n";
+ }
         if (skip_on_nomatch)
         {
             os_ << " // No match causes char to be skipped\n";
@@ -592,8 +637,11 @@
         os_ << " return id_;\n";
         os_ << "}\n\n";
 
- if (!generate_cpp_state_table(os_, name_suffix))
+ if (!generate_cpp_state_table(os_, name_suffix
+ , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion))
+ {
             return false;
+ }
 
         os_ << "}}}}} // namespace boost::spirit::lex::lexertl::static_\n\n";
 

Modified: trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -30,9 +30,8 @@
 
         static std::size_t next (
             boost::lexer::basic_state_machine<char_type> const& state_machine_
- , std::size_t &dfa_state_, Iterator const& start_
- , Iterator &start_token_, Iterator const& end_
- , std::size_t& unique_id_)
+ , std::size_t &dfa_state_, bool& bol_, Iterator &start_token_
+ , Iterator const& end_, std::size_t& unique_id_)
         {
             if (start_token_ == end_)
             {
@@ -40,16 +39,21 @@
                 return 0;
             }
 
+ bool bol = bol_;
+
         again:
             std::size_t const* lookup_ = &state_machine_.data()._lookup[dfa_state_]->
                 front ();
             std::size_t dfa_alphabet_ = state_machine_.data()._dfa_alphabet[dfa_state_];
             std::size_t const* dfa_ = &state_machine_.data()._dfa[dfa_state_]->front ();
+
             std::size_t const* ptr_ = dfa_ + dfa_alphabet_;
             Iterator curr_ = start_token_;
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + boost::lexer::id_index);
             std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
+ std::size_t end_start_state_ = dfa_state_;
+ bool end_bol_ = bol_;
             Iterator end_token_ = start_token_;
 
             while (curr_ != end_)
@@ -57,8 +61,7 @@
                 std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index];
                 std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
 
- if (BOL_state_ && (start_token_ == start_ ||
- *(start_token_ - 1) == '\n'))
+ if (BOL_state_ && bol)
                 {
                     ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
                 }
@@ -77,6 +80,7 @@
 
                     index_type index =
                         boost::lexer::char_traits<value_type>::call(*curr_++);
+ bol = (index == '\n') ? true : false;
                     std::size_t const state_ = ptr_[
                         lookup_[static_cast<std::size_t>(index)]];
 
@@ -93,7 +97,8 @@
                     end_state_ = true;
                     id_ = *(ptr_ + boost::lexer::id_index);
                     uid_ = *(ptr_ + boost::lexer::unique_id_index);
- dfa_state_ = *(ptr_ + boost::lexer::state_index);
+ end_start_state_ = *(ptr_ + boost::lexer::state_index);
+ end_bol_ = bol;
                     end_token_ = curr_;
                 }
             }
@@ -109,19 +114,29 @@
                     end_state_ = true;
                     id_ = *(ptr_ + boost::lexer::id_index);
                     uid_ = *(ptr_ + boost::lexer::unique_id_index);
- dfa_state_ = *(ptr_ + boost::lexer::state_index);
+ end_start_state_ = *(ptr_ + boost::lexer::state_index);
+ end_bol_ = bol;
                     end_token_ = curr_;
                 }
             }
 
             if (end_state_) {
                 // return longest match
+ dfa_state_ = end_start_state_;
                 start_token_ = end_token_;
 
- if (id_ == 0)
+ if (id_ == 0)
+ {
+ bol = end_bol_;
                     goto again;
+ }
+ else
+ {
+ bol_ = end_bol_;
+ }
             }
             else {
+ bol_ = (*start_token_ == '\n') ? true : false;
                 id_ = boost::lexer::npos;
                 uid_ = boost::lexer::npos;
             }
@@ -133,19 +148,26 @@
         ///////////////////////////////////////////////////////////////////////
         static std::size_t next (
             boost::lexer::basic_state_machine<char_type> const& state_machine_
- , Iterator const& start_, Iterator &start_token_, Iterator const& end_
+ , bool& bol_, Iterator &start_token_, Iterator const& end_
           , std::size_t& unique_id_)
         {
- if (start_token_ == end_) return 0;
+ if (start_token_ == end_)
+ {
+ unique_id_ = boost::lexer::npos;
+ return 0;
+ }
 
+ bool bol = bol_;
             std::size_t const* lookup_ = &state_machine_.data()._lookup[0]->front();
             std::size_t dfa_alphabet_ = state_machine_.data()._dfa_alphabet[0];
             std::size_t const* dfa_ = &state_machine_.data()._dfa[0]->front ();
             std::size_t const* ptr_ = dfa_ + dfa_alphabet_;
+
             Iterator curr_ = start_token_;
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + boost::lexer::id_index);
             std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
+ bool end_bol_ = bol_;
             Iterator end_token_ = start_token_;
 
             while (curr_ != end_)
@@ -153,8 +175,7 @@
                 std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index];
                 std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
 
- if (BOL_state_ && (start_token_ == start_ ||
- *(start_token_ - 1) == '\n'))
+ if (BOL_state_ && bol)
                 {
                     ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
                 }
@@ -170,9 +191,10 @@
                     typedef typename
                         boost::lexer::char_traits<value_type>::index_type
                     index_type;
-
+
                     index_type index =
                         boost::lexer::char_traits<value_type>::call(*curr_++);
+ bol = (index == '\n') ? true : false;
                     std::size_t const state_ = ptr_[
                         lookup_[static_cast<std::size_t>(index)]];
 
@@ -189,6 +211,7 @@
                     end_state_ = true;
                     id_ = *(ptr_ + boost::lexer::id_index);
                     uid_ = *(ptr_ + boost::lexer::unique_id_index);
+ end_bol_ = bol;
                     end_token_ = curr_;
                 }
             }
@@ -204,15 +227,18 @@
                     end_state_ = true;
                     id_ = *(ptr_ + boost::lexer::id_index);
                     uid_ = *(ptr_ + boost::lexer::unique_id_index);
+ end_bol_ = bol;
                     end_token_ = curr_;
                 }
             }
 
             if (end_state_) {
                 // return longest match
+ bol_ = end_bol_;
                 start_token_ = end_token_;
             }
             else {
+ bol_ = *start_token_ == '\n';
                 id_ = boost::lexer::npos;
                 uid_ = boost::lexer::npos;
             }
@@ -222,10 +248,6 @@
         }
     };
 
- ///////////////////////////////////////////////////////////////////////////
- typedef basic_iterator_tokeniser<char const *> tokeniser;
- typedef basic_iterator_tokeniser<wchar_t const *> wtokeniser;
-
 }}}}
 
 #endif

Modified: trunk/boost/spirit/home/lex/lexer/lexertl/static_functor_data.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/static_functor_data.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/static_functor_data.hpp 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -61,7 +61,8 @@
               , std::size_t> wrap_action_type;
  
             typedef std::size_t (*next_token_functor)(std::size_t&,
- Iterator const&, Iterator&, Iterator const&, std::size_t&);
+as Iterator const&, Iterator&, Iterator const&, std::size_t&);
+ bool&, Iterator&, Iterator const&, std::size_t&);
             typedef char_type const* (*get_state_name_type)(std::size_t);
 
             // initialize the shared data
@@ -70,7 +71,8 @@
                   , Iterator const& last)
               : first_(first), last_(last)
               , next_token_(data.next_)
- , get_state_name_(data.get_state_name_){}
+ , get_state_name_(data.get_state_name_)
+ , bol_(data.bol_) {}
 
             // The following functions are used by the implementation of the
             // placeholder '_state'.
@@ -107,7 +109,7 @@
                 return it;
             }
 
- // The function more() is used by the implemention of the support
+ // The function more() is used by the implementation of the support
             // function lex::more(). Its functionality is equivalent to flex'
             // function yymore(): it tells the lexer that the next time it
             // matches a rule, the corresponding token should be appended onto
@@ -149,7 +151,7 @@
             std::size_t next(Iterator& end, std::size_t& unique_id)
             {
                 std::size_t state;
- return next_token_(state, first_, end, last_, unique_id);
+ return next_token_(state, bol_, end, last_, unique_id);
             }
 
             // nothing to invoke, so this is empty
@@ -180,6 +182,8 @@
             next_token_functor next_token_;
             get_state_name_type get_state_name_;
 
+ bool bol_;
+
         private:
             // silence MSVC warning C4512: assignment operator could not be generated
             static_data& operator= (static_data const&);
@@ -242,7 +246,7 @@
             // underlying input sequence.
             std::size_t next(Iterator& end, std::size_t& unique_id)
             {
- return this->next_token_(state_, this->first_, end, this->last_
+ return this->next_token_(state_, this->bol_, end, this->last_
                   , unique_id);
             }
 
@@ -312,7 +316,7 @@
                 return it;
             }
 
- // The function more() is used by the implemention of the support
+ // The function more() is used by the implementation of the support
             // function lex::more(). Its functionality is equivalent to flex'
             // function yymore(): it tells the lexer that the next time it
             // matches a rule, the corresponding token should be appended onto

Modified: trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -139,15 +139,17 @@
 
             iterator_data_type(next_token_functor next
               , semantic_actions_type const& actions
- , get_state_name_type get_state_name, std::size_t num_states)
+ , get_state_name_type get_state_name, std::size_t num_states
+ , bool bol)
               : next_(next), actions_(actions), get_state_name_(get_state_name)
- , num_states_(num_states)
+ , num_states_(num_states), bol_(bol)
             {}
 
             next_token_functor next_;
             semantic_actions_type const& actions_;
             get_state_name_type get_state_name_;
             std::size_t num_states_;
+ bool bol_;
 
         private:
             // silence MSVC warning C4512: assignment operator could not be generated
@@ -173,8 +175,9 @@
           , char_type const* initial_state = 0) const
         {
             iterator_data_type iterator_data(
- &tables_type::template next<Iterator_>, actions_,
- &tables_type::state_name, tables_type::state_count()
+ &tables_type::template next<Iterator_>, actions_
+ , &tables_type::state_name, tables_type::state_count()
+ , tables_type::supports_bol
                 );
             return iterator_type(iterator_data, first, last, initial_state);
         }

Modified: trunk/boost/spirit/home/support/detail/lexer/generate_cpp.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/generate_cpp.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/generate_cpp.hpp 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -79,7 +79,7 @@
         upper_name_.begin (), ::toupper);
     os_ << "#ifndef " << upper_name_ + '\n';
     os_ << "#define " << upper_name_ + '\n';
- os_ << "// Copyright (c) 2008 Ben Hanson\n";
+ os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
     os_ << "//\n";
     os_ << "// Distributed under the Boost Software License, "
         "Version 1.0. (See accompanying\n";
@@ -94,25 +94,6 @@
         os_ << "std::size_t &start_state_, ";
     }
 
- if (sm_._seen_BOL_assertion || !optimise_parameters_)
- {
- if (use_pointers_)
- {
- os_ << iterator_ << " const ";
- }
- else
- {
- os_ << "const " << iterator_;
- }
-
- os_ << "start_, ";
- }
-
- if (dfas_ > 1 || sm_._seen_BOL_assertion || !optimise_parameters_)
- {
- os_ << "\n ";
- }
-
     if (use_pointers_)
     {
         os_ << iterator_ << " &";
@@ -133,11 +114,18 @@
         os_ << "const " << iterator_;
     }
 
- os_ << "end_)\n";
+ os_ << "end_, \n";
+ os_ << " std::size_t &unique_id_";
+
+ if (sm_._seen_BOL_assertion || !optimise_parameters_)
+ {
+ os_ << ", bool &beg_of_line_";
+ }
+
+ os_ << ")\n";
     os_ << "{\n";
- os_ << " enum {end_state_index, id_index, state_index, bol_index, "
- "eol_index,\n";
- os_ << " dead_state_index, dfa_offset};\n";
+ os_ << " enum {end_state_index, id_index, unique_id_index, state_index, bol_index,\n";
+ os_ << " eol_index, dead_state_index, dfa_offset};\n";
     os_ << " static const std::size_t npos = static_cast"
         "<std::size_t>(~0);\n";
 
@@ -330,7 +318,11 @@
         os_ << "};\n";
     }
 
- os_ << "\n if (start_token_ == end_) return 0;\n\n";
+ os_ << "\n if (start_token_ == end_)\n";
+ os_ << " {\n";
+ os_ << " unique_id_ = npos;\n";
+ os_ << " return 0;\n";
+ os_ << " }\n\n";
 
     if (dfas_ > 1)
     {
@@ -346,6 +338,19 @@
     os_ << " Iterator curr_ = start_token_;\n";
     os_ << " bool end_state_ = *ptr_ != 0;\n";
     os_ << " std::size_t id_ = *(ptr_ + id_index);\n";
+ os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n";
+
+ if (dfas_ > 1)
+ {
+ os_ << " std::size_t end_start_state_ = start_state_;\n";
+ }
+
+ if (sm_._seen_BOL_assertion)
+ {
+ os_ << " bool bol_ = beg_of_line_;\n";
+ os_ << " bool end_bol_ = bol_;\n";
+ }
+
     os_ << " Iterator end_token_ = start_token_;\n";
     os_ << '\n';
     os_ << " while (curr_ != end_)\n";
@@ -366,108 +371,89 @@
         os_ << '\n';
     }
 
- if (sm_._seen_BOL_assertion && sm_._seen_EOL_assertion)
+ if (sm_._seen_BOL_assertion)
     {
- os_ << " if (BOL_state_ && (start_token_ == start_ ||\n";
- os_ << " *(start_token_ - 1) == '\\n'))\n";
+ os_ << " if (BOL_state_ && bol_)\n";
         os_ << " {\n";
         os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
         os_ << " }\n";
- os_ << " else if (EOL_state_ && *curr_ == '\\n')\n";
- os_ << " {\n";
- os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
- os_ << " }\n";
- os_ << " else\n";
- os_ << " {\n";
- os_ << " const std::size_t state_ =\n";
+ }
 
- if (lookups_ == 256)
- {
- os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
- os_ << " (*curr_++)]];\n";
- }
- else
+ if (sm_._seen_EOL_assertion)
+ {
+ os_ << " ";
+
+ if (sm_._seen_BOL_assertion)
         {
- os_ << " ptr_[lookup_[*curr_++]];\n";
+ os_ << "else ";
         }
 
- os_ << '\n';
- os_ << " if (state_ == 0) break;\n";
- os_ << '\n';
- os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+ os_ << "if (EOL_state_ && *curr_ == '\\n')\n";
+ os_ << " {\n";
+ os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
         os_ << " }\n";
     }
- else if (sm_._seen_BOL_assertion)
+
+ std::string tab_ (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion ? " " : "");
+
+ if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
     {
- os_ << " if (BOL_state_ && (start_token_ == start_ ||\n";
- os_ << " *(start_token_ - 1) == '\\n'))\n";
- os_ << " {\n";
- os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
- os_ << " }\n";
         os_ << " else\n";
         os_ << " {\n";
- os_ << " const std::size_t state_ =\n";
+ }
+
+ if (sm_._seen_BOL_assertion)
+ {
+ os_ << " ";
 
         if (lookups_ == 256)
         {
- os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
- os_ << " (*curr_++)]];\n";
+ os_ << "char";
         }
         else
         {
- os_ << " ptr_[lookup_[*curr_++]];\n";
+ os_ << "wchar_t";
         }
 
- os_ << '\n';
- os_ << " if (state_ == 0) break;\n";
- os_ << '\n';
- os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
- os_ << " }\n";
+ os_ << " prev_char_ = *curr_++;\n\n";
+ os_ << " bol_ = prev_char_ == '\\n';\n\n";
     }
- else if (sm_._seen_EOL_assertion)
- {
- os_ << " if (EOL_state_ && *curr_ == '\\n')\n";
- os_ << " {\n";
- os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
- os_ << " }\n";
- os_ << " else\n";
- os_ << " {\n";
- os_ << " const std::size_t state_ =\n";
 
- if (lookups_ == 256)
- {
- os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
- os_ << " (*curr_++)]];\n";
- }
- else
- {
- os_ << " ptr_[lookup_[*curr_++]];\n";
- }
+ os_ << tab_;
+ os_ << " const std::size_t state_ =\n";
+ os_ << tab_;
+ os_ << " ptr_[lookup_[";
 
- os_ << '\n';
- os_ << " if (state_ == 0) break;\n";
- os_ << '\n';
- os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
- os_ << " }\n";
+ if (lookups_ == 256)
+ {
+ os_ << "static_cast<unsigned char>(";
+ }
+
+ if (sm_._seen_BOL_assertion)
+ {
+ os_ << "prev_char";
     }
     else
     {
- os_ << " const std::size_t state_ =\n";
+ os_ << "*curr_++";
+ }
 
- if (lookups_ == 256)
- {
- os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
- os_ << " (*curr_++)]];\n";
- }
- else
- {
- os_ << " ptr_[lookup_[*curr_++]];\n";
- }
 
- os_ << '\n';
- os_ << " if (state_ == 0) break;\n";
- os_ << '\n';
- os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+ if (lookups_ == 256)
+ {
+ os_ << ')';
+ }
+
+ os_ << "]];\n\n";
+
+ os_ << tab_;
+ os_ << " if (state_ == 0) break;\n\n";
+ os_ << tab_;
+ os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+
+ if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
+ {
+ os_ << " }\n";
     }
 
     os_ << '\n';
@@ -475,10 +461,16 @@
     os_ << " {\n";
     os_ << " end_state_ = true;\n";
     os_ << " id_ = *(ptr_ + id_index);\n";
+ os_ << " uid_ = *(ptr_ + unique_id_index);\n";
 
     if (dfas_ > 1)
     {
- os_ << " start_state_ = *(ptr_ + state_index);\n";
+ os_ << " end_start_state_ = *(ptr_ + state_index);\n";
+ }
+
+ if (sm_._seen_BOL_assertion)
+ {
+ os_ << " end_bol_ = bol_;\n";
     }
 
     os_ << " end_token_ = curr_;\n";
@@ -498,10 +490,16 @@
         os_ << " {\n";
         os_ << " end_state_ = true;\n";
         os_ << " id_ = *(ptr_ + id_index);\n";
+ os_ << " uid_ = *(ptr_ + unique_id_index);\n";
 
         if (dfas_ > 1)
         {
- os_ << " start_state_ = *(ptr_ + state_index);\n";
+ os_ << " end_start_state_ = *(ptr_ + state_index);\n";
+ }
+
+ if (sm_._seen_BOL_assertion)
+ {
+ os_ << " end_bol_ = bol_;\n";
         }
 
         os_ << " end_token_ = curr_;\n";
@@ -513,12 +511,40 @@
     os_ << " if (end_state_)\n";
     os_ << " {\n";
     os_ << " // return longest match\n";
+
+ if (dfas_ > 1)
+ {
+ os_ << " start_state_ = end_start_state_;\n";
+ }
+
+ if (sm_._seen_BOL_assertion && dfas_ < 2)
+ {
+ os_ << " beg_of_line_ = end_bol_;\n";
+ }
+
     os_ << " start_token_ = end_token_;\n";
 
     if (dfas_ > 1)
     {
         os_ << '\n';
- os_ << " if (id_ == 0) goto again;\n";
+ os_ << " if (id_ == 0)\n";
+ os_ << " {\n";
+
+ if (sm_._seen_BOL_assertion)
+ {
+ os_ << " bol_ = end_bol_;\n";
+ }
+
+ os_ << " goto again;\n";
+ os_ << " }\n";
+
+ if (sm_._seen_BOL_assertion)
+ {
+ os_ << " else\n";
+ os_ << " {\n";
+ os_ << " beg_of_line_ = end_bol_;\n";
+ os_ << " }\n";
+ }
     }
 
     os_ << " }\n";
@@ -528,12 +554,20 @@
     if (skip_unknown_)
     {
         os_ << " // No match causes char to be skipped\n";
+
+ if (sm_._seen_BOL_assertion)
+ {
+ os_ << " beg_of_line_ = *start_token_ == '\\n';\n";
+ }
+
         os_ << " ++start_token_;\n";
     }
 
     os_ << " id_ = npos;\n";
+ os_ << " uid_ = npos;\n";
     os_ << " }\n";
     os_ << '\n';
+ os_ << " unique_id_ = uid_;\n";
     os_ << " return id_;\n";
     os_ << "}\n";
     os_ << "\n#endif\n";

Modified: trunk/boost/spirit/home/support/detail/lexer/generator.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/generator.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/generator.hpp 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -285,7 +285,8 @@
                 locale_, node_ptr_vector_, macromap_, token_map_,
                 seen_BOL_assertion_, seen_EOL_assertion_);
             macro_iter_pair map_iter_ = macromap_.
- insert (macro_pair (name_, (detail::node const*)0));
+ insert (macro_pair (name_, static_cast<const detail::node *>
+ (0)));
 
             map_iter_.first->second = node_;
         }
@@ -511,7 +512,8 @@
                     }
                     else
                     {
- iter_ = lhs_->insert (++iter_, (charset*)0);
+ iter_ = lhs_->insert (++iter_,
+ static_cast<charset *>(0));
                         *iter_ = overlap_.release ();
 
                         // VC++ 6 Hack:
@@ -653,7 +655,8 @@
                     }
                     else
                     {
- iter_ = lhs_->insert (++iter_, (equivset*)0);
+ iter_ = lhs_->insert (++iter_,
+ static_cast<equivset *>(0));
                         *iter_ = overlap_.release ();
 
                         // VC++ 6 Hack:

Modified: trunk/boost/spirit/home/support/detail/lexer/input.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/input.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/input.hpp 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -172,6 +172,7 @@
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + id_index);
             std::size_t uid_ = *(ptr_ + unique_id_index);
+ std::size_t end_start_state_ = start_state_;
             bool end_bol_ = bol_;
             FwdIter end_token_ = start_token_;
 
@@ -211,7 +212,7 @@
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
                     uid_ = *(ptr_ + unique_id_index);
- start_state_ = *(ptr_ + state_index);
+ end_start_state_ = *(ptr_ + state_index);
                     end_bol_ = bol_;
                     end_token_ = curr_;
                 }
@@ -228,7 +229,7 @@
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
                     uid_ = *(ptr_ + unique_id_index);
- start_state_ = *(ptr_ + state_index);
+ end_start_state_ = *(ptr_ + state_index);
                     end_bol_ = bol_;
                     end_token_ = curr_;
                 }
@@ -237,14 +238,18 @@
             if (end_state_)
             {
                 // return longest match
- _data.bol = end_bol_;
+ start_state_ = end_start_state_;
                 start_token_ = end_token_;
 
                 if (id_ == 0)
                 {
- bol_ = _data.bol;
+ bol_ = end_bol_;
                     goto again;
                 }
+ else
+ {
+ _data.bol = end_bol_;
+ }
             }
             else
             {
@@ -279,6 +284,7 @@
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + id_index);
             std::size_t uid_ = *(ptr_ + unique_id_index);
+ std::size_t end_start_state_ = start_state_;
             FwdIter end_token_ = start_token_;
 
             while (curr_ != end_)
@@ -298,7 +304,7 @@
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
                     uid_ = *(ptr_ + unique_id_index);
- start_state_ = *(ptr_ + state_index);
+ end_start_state_ = *(ptr_ + state_index);
                     end_token_ = curr_;
                 }
             }
@@ -306,6 +312,7 @@
             if (end_state_)
             {
                 // return longest match
+ start_state_ = end_start_state_;
                 start_token_ = end_token_;
 
                 if (id_ == 0) goto again;
@@ -401,8 +408,8 @@
             if (end_state_)
             {
                 // return longest match
- start_token_ = end_token_;
                 _data.bol = end_bol_;
+ start_token_ = end_token_;
             }
             else
             {


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk