|
Boost-Commit : |
Subject: [Boost-commit] svn:boost r57582 - in trunk/boost/spirit/home: lex/lexer/lexertl support/detail/lexer
From: hartmut.kaiser_at_[hidden]
Date: 2009-11-11 16:29:22
Author: hkaiser
Date: 2009-11-11 16:29:20 EST (Wed, 11 Nov 2009)
New Revision: 57582
URL: http://svn.boost.org/trac/boost/changeset/57582
Log:
Spirit: updating static lexer support
Text files modified:
trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp | 117 ++++++++-------------------------------
trunk/boost/spirit/home/support/detail/lexer/debug.hpp | 70 -----------------------
trunk/boost/spirit/home/support/detail/lexer/generate_re2c.hpp | 103 ++++++++++------------------------
trunk/boost/spirit/home/support/detail/lexer/string_token.hpp | 72 ++++++++++++++++++++++++
4 files changed, 130 insertions(+), 232 deletions(-)
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp 2009-11-11 16:29:20 EST (Wed, 11 Nov 2009)
@@ -16,11 +16,8 @@
#include <boost/spirit/home/support/detail/lexer/rules.hpp>
#include <boost/spirit/home/support/detail/lexer/size_t.hpp>
#include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
+#include <boost/spirit/home/support/detail/lexer/debug.hpp>
#include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp>
-#include <boost/spirit/home/karma/numeric/uint.hpp>
-#include <boost/spirit/home/karma/string/lit.hpp>
-#include <boost/spirit/home/karma/operator/sequence.hpp>
-#include <boost/spirit/home/karma/generate.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/lexical_cast.hpp>
@@ -536,61 +533,14 @@
}
///////////////////////////////////////////////////////////////////////////
- inline std::string get_charcode(char ch)
+ template <typename Char>
+ inline std::string get_charlit(Char ch)
{
- std::string result;
- switch(ch) {
- case '\t':
- result = "\\t";
- break;
- case '\b':
- result = "\\b";
- break;
- case '\r':
- result = "\\r";
- break;
- case '\n':
- result = "\\n";
- break;
- case '\f':
- result = "\\f";
- break;
- case '\v':
- result = "\\v";
- break;
- case '\\':
- result = "\\\\";
- break;
- case '\'':
- result = "\\'";
- break;
- default:
- if (std::isprint(ch))
- {
- result = ch;
- }
- else
- {
- typedef karma::uint_generator<unsigned char, 16> uintgen_type;
- std::back_insert_iterator<std::string> sink(result);
- karma::generate(sink, "\\x" << uintgen_type()(ch));
- }
- break;
- }
+ std::basic_string<Char> result;
+ boost::lexer::basic_string_token<Char>::escape_char (ch, result);
return result;
}
- inline std::string get_charcode(wchar_t ch)
- {
- if (ch & ~0xff) {
- BOOST_ASSERT(false); // not implemented yet
-
- std::string result;
- return result;
- }
- return get_charcode(static_cast<char>(ch & 0xff));
- }
-
///////////////////////////////////////////////////////////////////////////
template <typename Char>
bool generate_function_body_switch(std::ostream & os_
@@ -657,7 +607,7 @@
}
os_ << " " << ((lookups_ == 256) ? "char" : "wchar_t")
- << " ch_ = 0;\n\n";
+ << " ch_ = 0;\n";
for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
{
std::size_t const states_ = iter_->states;
@@ -667,10 +617,8 @@
std::size_t const transitions_ = iter_->transitions;
std::size_t t_ = 0;
- if (dfas_ > 1 || dfa_ != 0 || state_ != 0)
- {
- os_ << "state" << dfa_ << '_' << state_ << ":\n";
- }
+ os_ << "\nstate" << dfa_ << '_' << state_ << ":\n";
+
if (iter_->end_state)
{
os_ << " end_state_ = true;\n";
@@ -688,6 +636,8 @@
{
os_ << " end_bol_ = bol;\n";
}
+
+ if (transitions_) os_ << '\n';
}
if (t_ < transitions_ ||
@@ -736,43 +686,28 @@
{
if (!first_char_)
{
- if (iter_->token._negated)
- {
- os_ << " && ";
- }
- else
- {
- os_ << " || ";
- }
+ os_ << ((iter_->token._negated) ? " && " : " || ");
+ }
+ else
+ {
+ first_char_ = false;
}
-
- first_char_ = false;
-
if (range_)
{
if (iter_->token._negated)
{
os_ << "!";
}
-
- os_ << "(ch_ >= '" << get_charcode(start_char_);
- os_ << "' && ch_ <= '" << get_charcode(curr_char_) << "')";
+ os_ << "(ch_ >= '" << get_charlit(start_char_)
+ << "' && ch_ <= '"
+ << get_charlit(curr_char_) << "')";
range_ = false;
}
else
{
- os_ << "ch_ ";
-
- if (iter_->token._negated)
- {
- os_ << "!=";
- }
- else
- {
- os_ << "==";
- }
-
- os_ << " '" << get_charcode(curr_char_) << "'";
+ os_ << "ch_ "
+ << ((iter_->token._negated) ? "!=" : "==")
+ << " '" << get_charlit(curr_char_) << "'";
}
}
}
@@ -782,16 +717,16 @@
++iter_;
}
- if (transitions_) os_ << '\n';
-
- os_ << " goto end;\n";
- os_ << '\n';
+ if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
+ {
+ os_ << " goto end;\n";
+ }
if (transitions_ == 0) ++iter_;
}
}
- os_ << "end:\n";
+ os_ << "\nend:\n";
os_ << " if (end_state_)\n";
os_ << " {\n";
os_ << " // return longest match\n";
Modified: trunk/boost/spirit/home/support/detail/lexer/debug.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/debug.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/debug.hpp 2009-11-11 16:29:20 EST (Wed, 11 Nov 2009)
@@ -39,75 +39,7 @@
while (size_)
{
- switch (*ptr_)
- {
- case '\0':
- out_ += '\\';
- out_ += '0';
- break;
- case '\a':
- out_ += '\\';
- out_ += 'a';
- break;
- case '\b':
- out_ += '\\';
- out_ += 'b';
- break;
- case 27:
- out_ += '\\';
- out_ += 'x';
- out_ += '1';
- out_ += 'b';
- break;
- case '\f':
- out_ += '\\';
- out_ += 'f';
- break;
- case '\n':
- out_ += '\\';
- out_ += 'n';
- break;
- case '\r':
- out_ += '\\';
- out_ += 'r';
- break;
- case '\t':
- out_ += '\\';
- out_ += 't';
- break;
- case '\v':
- out_ += '\\';
- out_ += 'v';
- break;
- case '\\':
- out_ += '\\';
- out_ += '\\';
- break;
- case '"':
- out_ += '\\';
- out_ += '"';
- break;
- default:
- {
- if (*ptr_ < 32 && *ptr_ >= 0)
- {
- stringstream ss_;
-
- out_ += '\\';
- out_ += 'x';
- ss_ << std::hex <<
- static_cast<std::size_t> (*ptr_);
- out_ += ss_.str ();
- }
- else
- {
- out_ += *ptr_;
- }
-
- break;
- }
- }
-
+ basic_string_token<CharT>::escape_char (*ptr_, out_);
++ptr_;
--size_;
}
Modified: trunk/boost/spirit/home/support/detail/lexer/generate_re2c.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/generate_re2c.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/generate_re2c.hpp 2009-11-11 16:29:20 EST (Wed, 11 Nov 2009)
@@ -19,66 +19,13 @@
{
namespace lexer
{
-inline std::string get_charcode(char ch)
-{
- std::string result;
- switch(ch) {
- case '\t':
- result = "\\t";
- break;
- case '\b':
- result = "\\b";
- break;
- case '\r':
- result = "\\r";
- break;
- case '\n':
- result = "\\n";
- break;
- case '\f':
- result = "\\f";
- break;
- case '\v':
- result = "\\v";
- break;
- case '\\':
- result = "\\\\";
- break;
- case '\'':
- result = "\\'";
- break;
- default:
- if (std::isprint(ch))
- {
- result = ch;
- }
- else
- {
- result = "\\x";
- char buffer[3];
- result += ltoa(ch, buffer, 16);
- }
- break;
- }
- return result;
-}
-
-inline std::string get_charcode(wchar_t ch)
-{
- if (ch & ~0xff)
- {
- std::string result;
- return result; // not implemented yet
- }
- return get_charcode(static_cast<char>(ch & 0xff));
-}
-
template<typename CharT>
void generate_re2c (const basic_state_machine<CharT> &state_machine_,
std::ostream &os_, const bool use_pointers_ = false,
const bool skip_unknown_ = true, const bool optimise_parameters_ = true,
const char *name_ = "next_token")
{
+ typedef typename lexertl::basic_string_token<CharT> string_token;
const detail::internals &sm_ = state_machine_.data ();
if (sm_._lookup->size () == 0)
@@ -227,6 +174,7 @@
}
os_ << " default:\n";
+ os_ << " throw std::runtime_error (\"Invalid start state!\")\n";
os_ << " break;\n";
os_ << " }\n\n";
}
@@ -253,10 +201,8 @@
const std::size_t transitions_ = iter_->transitions;
std::size_t t_ = 0;
- if (dfas_ > 1 || dfa_ != 0 || state_ != 0)
- {
- os_ << "state" << dfa_ << '_' << state_ << ":\n";
- }
+ os_ << "state" << dfa_ << '_' << state_ << ":\n";
+
if (iter_->end_state)
{
os_ << " end_state_ = true;\n";
@@ -274,22 +220,23 @@
{
os_ << " end_bol_ = bol_;\n";
}
+
+ if (transitions_) os_ << '\n';
}
- if (t_ < transitions_ ||
- iter_->bol_index != boost::lexer::npos ||
- iter_->eol_index != boost::lexer::npos)
+ if (t_ < transitions_ || iter_->bol_index != lexertl::npos ||
+ iter_->eol_index != lexertl::npos)
{
- os_ << " if (curr_ == end_) goto end;\n";
+ os_ << " if (curr_ == end_) goto end;\n\n";
os_ << " ch_ = *curr_;\n";
- if (iter_->bol_index != boost::lexer::npos)
+ if (iter_->bol_index != lexertl::npos)
{
os_ << "\n if (bol_) goto state" << dfa_ << '_' <<
iter_->bol_index << ";\n\n";
}
- if (iter_->eol_index != boost::lexer::npos)
+ if (iter_->eol_index != lexertl::npos)
{
os_ << "\n if (ch_ == '\n') goto state" << dfa_ << '_' <<
iter_->eol_index << ";\n\n";
@@ -340,17 +287,28 @@
if (range_)
{
+ typename string_token::string temp_;
+
if (iter_->token._negated)
{
os_ << "!";
}
- os_ << "(ch_ >= '" << get_charcode(start_char_);
- os_ << "' && ch_ <= '" << get_charcode(curr_char_) << "\')";
+ string_token::escape_char (start_char_, temp_);
+ os_ << "(ch_ >= '" << temp_;
+#if defined _MSC_VER && _MSC_VER <= 1200
+ temp_.erase ();
+#else
+ temp_.clear ();
+#endif
+ string_token::escape_char (curr_char_, temp_);
+ os_ << "' && ch_ <= '" << temp_ << "')";
range_ = false;
}
else
{
+ typename string_token::string temp_;
+
os_ << "ch_ ";
if (iter_->token._negated)
@@ -362,20 +320,21 @@
os_ << "==";
}
- os_ << " '" << get_charcode(curr_char_) << "'";
+ string_token::escape_char (curr_char_, temp_);
+ os_ << " '" << temp_ << "'";
}
}
}
os_ << ") goto state" << dfa_ << '_' << iter_->goto_state <<
- ";\n";
+ ";\n\n";
++iter_;
}
- if (transitions_) os_ << '\n';
-
- os_ << " goto end;\n";
- os_ << '\n';
+ if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
+ {
+ os_ << " goto end;\n";
+ }
if (transitions_ == 0) ++iter_;
}
Modified: trunk/boost/spirit/home/support/detail/lexer/string_token.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/string_token.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/string_token.hpp 2009-11-11 16:29:20 EST (Wed, 11 Nov 2009)
@@ -146,6 +146,78 @@
}
}
+ static void escape_char (const CharT ch_, string &out_)
+ {
+ switch (ch_)
+ {
+ case '\0':
+ out_ += '\\';
+ out_ += '0';
+ break;
+ case '\a':
+ out_ += '\\';
+ out_ += 'a';
+ break;
+ case '\b':
+ out_ += '\\';
+ out_ += 'b';
+ break;
+ case 27:
+ out_ += '\\';
+ out_ += 'x';
+ out_ += '1';
+ out_ += 'b';
+ break;
+ case '\f':
+ out_ += '\\';
+ out_ += 'f';
+ break;
+ case '\n':
+ out_ += '\\';
+ out_ += 'n';
+ break;
+ case '\r':
+ out_ += '\\';
+ out_ += 'r';
+ break;
+ case '\t':
+ out_ += '\\';
+ out_ += 't';
+ break;
+ case '\v':
+ out_ += '\\';
+ out_ += 'v';
+ break;
+ case '\\':
+ out_ += '\\';
+ out_ += '\\';
+ break;
+ case '"':
+ out_ += '\\';
+ out_ += '"';
+ break;
+ default:
+ {
+ if (ch_ < 32 && ch_ >= 0)
+ {
+ std::basic_stringstream<CharT> ss_;
+
+ out_ += '\\';
+ out_ += 'x';
+ ss_ << std::hex <<
+ static_cast<std::size_t> (ch_);
+ out_ += ss_.str ();
+ }
+ else
+ {
+ out_ += ch_;
+ }
+
+ break;
+ }
+ }
+ }
+
private:
void intersect_same_types (basic_string_token &rhs_,
basic_string_token &overlap_)
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk