Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r57582 - in trunk/boost/spirit/home: lex/lexer/lexertl support/detail/lexer
From: hartmut.kaiser_at_[hidden]
Date: 2009-11-11 16:29:22


Author: hkaiser
Date: 2009-11-11 16:29:20 EST (Wed, 11 Nov 2009)
New Revision: 57582
URL: http://svn.boost.org/trac/boost/changeset/57582

Log:
Spirit: updating static lexer support
Text files modified:
   trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp | 117 ++++++++-------------------------------
   trunk/boost/spirit/home/support/detail/lexer/debug.hpp | 70 -----------------------
   trunk/boost/spirit/home/support/detail/lexer/generate_re2c.hpp | 103 ++++++++++------------------------
   trunk/boost/spirit/home/support/detail/lexer/string_token.hpp | 72 ++++++++++++++++++++++++
   4 files changed, 130 insertions(+), 232 deletions(-)

Modified: trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp 2009-11-11 16:29:20 EST (Wed, 11 Nov 2009)
@@ -16,11 +16,8 @@
 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
 #include <boost/spirit/home/support/detail/lexer/size_t.hpp>
 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
+#include <boost/spirit/home/support/detail/lexer/debug.hpp>
 #include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp>
-#include <boost/spirit/home/karma/numeric/uint.hpp>
-#include <boost/spirit/home/karma/string/lit.hpp>
-#include <boost/spirit/home/karma/operator/sequence.hpp>
-#include <boost/spirit/home/karma/generate.hpp>
 #include <boost/algorithm/string.hpp>
 #include <boost/lexical_cast.hpp>
 
@@ -536,61 +533,14 @@
     }
 
     ///////////////////////////////////////////////////////////////////////////
- inline std::string get_charcode(char ch)
+ template <typename Char>
+ inline std::string get_charlit(Char ch)
     {
- std::string result;
- switch(ch) {
- case '\t':
- result = "\\t";
- break;
- case '\b':
- result = "\\b";
- break;
- case '\r':
- result = "\\r";
- break;
- case '\n':
- result = "\\n";
- break;
- case '\f':
- result = "\\f";
- break;
- case '\v':
- result = "\\v";
- break;
- case '\\':
- result = "\\\\";
- break;
- case '\'':
- result = "\\'";
- break;
- default:
- if (std::isprint(ch))
- {
- result = ch;
- }
- else
- {
- typedef karma::uint_generator<unsigned char, 16> uintgen_type;
- std::back_insert_iterator<std::string> sink(result);
- karma::generate(sink, "\\x" << uintgen_type()(ch));
- }
- break;
- }
+ std::basic_string<Char> result;
+ boost::lexer::basic_string_token<Char>::escape_char (ch, result);
         return result;
     }
 
- inline std::string get_charcode(wchar_t ch)
- {
- if (ch & ~0xff) {
- BOOST_ASSERT(false); // not implemented yet
-
- std::string result;
- return result;
- }
- return get_charcode(static_cast<char>(ch & 0xff));
- }
-
     ///////////////////////////////////////////////////////////////////////////
     template <typename Char>
     bool generate_function_body_switch(std::ostream & os_
@@ -657,7 +607,7 @@
         }
 
         os_ << " " << ((lookups_ == 256) ? "char" : "wchar_t")
- << " ch_ = 0;\n\n";
+ << " ch_ = 0;\n";
         for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
         {
             std::size_t const states_ = iter_->states;
@@ -667,10 +617,8 @@
                 std::size_t const transitions_ = iter_->transitions;
                 std::size_t t_ = 0;
 
- if (dfas_ > 1 || dfa_ != 0 || state_ != 0)
- {
- os_ << "state" << dfa_ << '_' << state_ << ":\n";
- }
+ os_ << "\nstate" << dfa_ << '_' << state_ << ":\n";
+
                 if (iter_->end_state)
                 {
                     os_ << " end_state_ = true;\n";
@@ -688,6 +636,8 @@
                     {
                         os_ << " end_bol_ = bol;\n";
                     }
+
+ if (transitions_) os_ << '\n';
                 }
 
                 if (t_ < transitions_ ||
@@ -736,43 +686,28 @@
                         {
                             if (!first_char_)
                             {
- if (iter_->token._negated)
- {
- os_ << " && ";
- }
- else
- {
- os_ << " || ";
- }
+ os_ << ((iter_->token._negated) ? " && " : " || ");
+ }
+ else
+ {
+ first_char_ = false;
                             }
-
- first_char_ = false;
-
                             if (range_)
                             {
                                 if (iter_->token._negated)
                                 {
                                     os_ << "!";
                                 }
-
- os_ << "(ch_ >= '" << get_charcode(start_char_);
- os_ << "' && ch_ <= '" << get_charcode(curr_char_) << "')";
+ os_ << "(ch_ >= '" << get_charlit(start_char_)
+ << "' && ch_ <= '"
+ << get_charlit(curr_char_) << "')";
                                 range_ = false;
                             }
                             else
                             {
- os_ << "ch_ ";
-
- if (iter_->token._negated)
- {
- os_ << "!=";
- }
- else
- {
- os_ << "==";
- }
-
- os_ << " '" << get_charcode(curr_char_) << "'";
+ os_ << "ch_ "
+ << ((iter_->token._negated) ? "!=" : "==")
+ << " '" << get_charlit(curr_char_) << "'";
                             }
                         }
                     }
@@ -782,16 +717,16 @@
                     ++iter_;
                 }
 
- if (transitions_) os_ << '\n';
-
- os_ << " goto end;\n";
- os_ << '\n';
+ if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
+ {
+ os_ << " goto end;\n";
+ }
 
                 if (transitions_ == 0) ++iter_;
             }
         }
 
- os_ << "end:\n";
+ os_ << "\nend:\n";
         os_ << " if (end_state_)\n";
         os_ << " {\n";
         os_ << " // return longest match\n";

Modified: trunk/boost/spirit/home/support/detail/lexer/debug.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/debug.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/debug.hpp 2009-11-11 16:29:20 EST (Wed, 11 Nov 2009)
@@ -39,75 +39,7 @@
 
         while (size_)
         {
- switch (*ptr_)
- {
- case '\0':
- out_ += '\\';
- out_ += '0';
- break;
- case '\a':
- out_ += '\\';
- out_ += 'a';
- break;
- case '\b':
- out_ += '\\';
- out_ += 'b';
- break;
- case 27:
- out_ += '\\';
- out_ += 'x';
- out_ += '1';
- out_ += 'b';
- break;
- case '\f':
- out_ += '\\';
- out_ += 'f';
- break;
- case '\n':
- out_ += '\\';
- out_ += 'n';
- break;
- case '\r':
- out_ += '\\';
- out_ += 'r';
- break;
- case '\t':
- out_ += '\\';
- out_ += 't';
- break;
- case '\v':
- out_ += '\\';
- out_ += 'v';
- break;
- case '\\':
- out_ += '\\';
- out_ += '\\';
- break;
- case '"':
- out_ += '\\';
- out_ += '"';
- break;
- default:
- {
- if (*ptr_ < 32 && *ptr_ >= 0)
- {
- stringstream ss_;
-
- out_ += '\\';
- out_ += 'x';
- ss_ << std::hex <<
- static_cast<std::size_t> (*ptr_);
- out_ += ss_.str ();
- }
- else
- {
- out_ += *ptr_;
- }
-
- break;
- }
- }
-
+ basic_string_token<CharT>::escape_char (*ptr_, out_);
             ++ptr_;
             --size_;
         }

Modified: trunk/boost/spirit/home/support/detail/lexer/generate_re2c.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/generate_re2c.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/generate_re2c.hpp 2009-11-11 16:29:20 EST (Wed, 11 Nov 2009)
@@ -19,66 +19,13 @@
 {
 namespace lexer
 {
-inline std::string get_charcode(char ch)
-{
- std::string result;
- switch(ch) {
- case '\t':
- result = "\\t";
- break;
- case '\b':
- result = "\\b";
- break;
- case '\r':
- result = "\\r";
- break;
- case '\n':
- result = "\\n";
- break;
- case '\f':
- result = "\\f";
- break;
- case '\v':
- result = "\\v";
- break;
- case '\\':
- result = "\\\\";
- break;
- case '\'':
- result = "\\'";
- break;
- default:
- if (std::isprint(ch))
- {
- result = ch;
- }
- else
- {
- result = "\\x";
- char buffer[3];
- result += ltoa(ch, buffer, 16);
- }
- break;
- }
- return result;
-}
-
-inline std::string get_charcode(wchar_t ch)
-{
- if (ch & ~0xff)
- {
- std::string result;
- return result; // not implemented yet
- }
- return get_charcode(static_cast<char>(ch & 0xff));
-}
-
 template<typename CharT>
 void generate_re2c (const basic_state_machine<CharT> &state_machine_,
     std::ostream &os_, const bool use_pointers_ = false,
     const bool skip_unknown_ = true, const bool optimise_parameters_ = true,
     const char *name_ = "next_token")
 {
+ typedef typename lexertl::basic_string_token<CharT> string_token;
     const detail::internals &sm_ = state_machine_.data ();
 
     if (sm_._lookup->size () == 0)
@@ -227,6 +174,7 @@
         }
 
         os_ << " default:\n";
+ os_ << " throw std::runtime_error (\"Invalid start state!\")\n";
         os_ << " break;\n";
         os_ << " }\n\n";
     }
@@ -253,10 +201,8 @@
             const std::size_t transitions_ = iter_->transitions;
             std::size_t t_ = 0;
 
- if (dfas_ > 1 || dfa_ != 0 || state_ != 0)
- {
- os_ << "state" << dfa_ << '_' << state_ << ":\n";
- }
+ os_ << "state" << dfa_ << '_' << state_ << ":\n";
+
             if (iter_->end_state)
             {
                 os_ << " end_state_ = true;\n";
@@ -274,22 +220,23 @@
                 {
                     os_ << " end_bol_ = bol_;\n";
                 }
+
+ if (transitions_) os_ << '\n';
             }
 
- if (t_ < transitions_ ||
- iter_->bol_index != boost::lexer::npos ||
- iter_->eol_index != boost::lexer::npos)
+ if (t_ < transitions_ || iter_->bol_index != lexertl::npos ||
+ iter_->eol_index != lexertl::npos)
             {
- os_ << " if (curr_ == end_) goto end;\n";
+ os_ << " if (curr_ == end_) goto end;\n\n";
                 os_ << " ch_ = *curr_;\n";
 
- if (iter_->bol_index != boost::lexer::npos)
+ if (iter_->bol_index != lexertl::npos)
                 {
                     os_ << "\n if (bol_) goto state" << dfa_ << '_' <<
                         iter_->bol_index << ";\n\n";
                 }
 
- if (iter_->eol_index != boost::lexer::npos)
+ if (iter_->eol_index != lexertl::npos)
                 {
                     os_ << "\n if (ch_ == '\n') goto state" << dfa_ << '_' <<
                         iter_->eol_index << ";\n\n";
@@ -340,17 +287,28 @@
 
                         if (range_)
                         {
+ typename string_token::string temp_;
+
                             if (iter_->token._negated)
                             {
                                 os_ << "!";
                             }
 
- os_ << "(ch_ >= '" << get_charcode(start_char_);
- os_ << "' && ch_ <= '" << get_charcode(curr_char_) << "\')";
+ string_token::escape_char (start_char_, temp_);
+ os_ << "(ch_ >= '" << temp_;
+#if defined _MSC_VER && _MSC_VER <= 1200
+ temp_.erase ();
+#else
+ temp_.clear ();
+#endif
+ string_token::escape_char (curr_char_, temp_);
+ os_ << "' && ch_ <= '" << temp_ << "')";
                             range_ = false;
                         }
                         else
                         {
+ typename string_token::string temp_;
+
                             os_ << "ch_ ";
 
                             if (iter_->token._negated)
@@ -362,20 +320,21 @@
                                 os_ << "==";
                             }
 
- os_ << " '" << get_charcode(curr_char_) << "'";
+ string_token::escape_char (curr_char_, temp_);
+ os_ << " '" << temp_ << "'";
                         }
                     }
                 }
 
                 os_ << ") goto state" << dfa_ << '_' << iter_->goto_state <<
- ";\n";
+ ";\n\n";
                 ++iter_;
             }
 
- if (transitions_) os_ << '\n';
-
- os_ << " goto end;\n";
- os_ << '\n';
+ if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
+ {
+ os_ << " goto end;\n";
+ }
 
             if (transitions_ == 0) ++iter_;
         }

Modified: trunk/boost/spirit/home/support/detail/lexer/string_token.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/string_token.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/string_token.hpp 2009-11-11 16:29:20 EST (Wed, 11 Nov 2009)
@@ -146,6 +146,78 @@
         }
     }
 
+ static void escape_char (const CharT ch_, string &out_)
+ {
+ switch (ch_)
+ {
+ case '\0':
+ out_ += '\\';
+ out_ += '0';
+ break;
+ case '\a':
+ out_ += '\\';
+ out_ += 'a';
+ break;
+ case '\b':
+ out_ += '\\';
+ out_ += 'b';
+ break;
+ case 27:
+ out_ += '\\';
+ out_ += 'x';
+ out_ += '1';
+ out_ += 'b';
+ break;
+ case '\f':
+ out_ += '\\';
+ out_ += 'f';
+ break;
+ case '\n':
+ out_ += '\\';
+ out_ += 'n';
+ break;
+ case '\r':
+ out_ += '\\';
+ out_ += 'r';
+ break;
+ case '\t':
+ out_ += '\\';
+ out_ += 't';
+ break;
+ case '\v':
+ out_ += '\\';
+ out_ += 'v';
+ break;
+ case '\\':
+ out_ += '\\';
+ out_ += '\\';
+ break;
+ case '"':
+ out_ += '\\';
+ out_ += '"';
+ break;
+ default:
+ {
+ if (ch_ < 32 && ch_ >= 0)
+ {
+ std::basic_stringstream<CharT> ss_;
+
+ out_ += '\\';
+ out_ += 'x';
+ ss_ << std::hex <<
+ static_cast<std::size_t> (ch_);
+ out_ += ss_.str ();
+ }
+ else
+ {
+ out_ += ch_;
+ }
+
+ break;
+ }
+ }
+ }
+
 private:
     void intersect_same_types (basic_string_token &rhs_,
         basic_string_token &overlap_)


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk