Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r57562 - trunk/boost/spirit/home/lex/lexer/lexertl
From: hartmut.kaiser_at_[hidden]
Date: 2009-11-10 22:56:11


Author: hkaiser
Date: 2009-11-10 22:56:11 EST (Tue, 10 Nov 2009)
New Revision: 57562
URL: http://svn.boost.org/trac/boost/changeset/57562

Log:
Spirit: re-added missing file
Added:
   trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp (contents, props changed)

Added: trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp 2009-11-10 22:56:11 EST (Tue, 10 Nov 2009)
@@ -0,0 +1,969 @@
+// Copyright (c) 2008-2009 Ben Hanson
+// Copyright (c) 2008-2009 Hartmut Kaiser
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#if !defined(BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM)
+#define BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM
+
+#if defined(_MSC_VER)
+#pragma once
+#endif
+
+#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
+#include <boost/spirit/home/support/detail/lexer/consts.hpp>
+#include <boost/spirit/home/support/detail/lexer/rules.hpp>
+#include <boost/spirit/home/support/detail/lexer/size_t.hpp>
+#include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
+#include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp>
+#include <boost/algorithm/string.hpp>
+#include <boost/lexical_cast.hpp>
+
+///////////////////////////////////////////////////////////////////////////////
+namespace boost { namespace spirit { namespace lex { namespace lexertl
+{
+ namespace detail
+ {
+
+ inline bool
+ generate_delimiter(std::ostream &os_)
+ {
+ os_ << std::string(80, '/') << "\n";
+ return os_.good();
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Generate a table of the names of the used lexer states, which is a bit
+ // tricky, because the table stored with the rules is sorted based on the
+ // names, but we need it sorted using the state ids.
+ template <typename Char>
+ inline bool
+ generate_cpp_state_info (boost::lexer::basic_rules<Char> const& rules_
+ , std::ostream &os_, char const* name_suffix)
+ {
+ // we need to re-sort the state names in ascending order of the state
+ // ids, filling possible gaps in between later
+ typedef typename
+ boost::lexer::basic_rules<Char>::string_size_t_map::const_iterator
+ state_iterator;
+ typedef std::map<std::size_t, char const*> reverse_state_map_type;
+
+ reverse_state_map_type reverse_state_map;
+ state_iterator send = rules_.statemap().end();
+ for (state_iterator sit = rules_.statemap().begin(); sit != send; ++sit)
+ {
+ typedef typename reverse_state_map_type::value_type value_type;
+ reverse_state_map.insert(value_type((*sit).second, (*sit).first.c_str()));
+ }
+
+ generate_delimiter(os_);
+ os_ << "// this table defines the names of the lexer states\n";
+ os_ << "char const* const lexer_state_names"
+ << (name_suffix[0] ? "_" : "") << name_suffix
+ << "[" << rules_.statemap().size() << "] = \n{\n";
+
+ typedef typename reverse_state_map_type::iterator iterator;
+ iterator rend = reverse_state_map.end();
+ std::size_t last_id = 0;
+ for (iterator rit = reverse_state_map.begin(); rit != rend; ++last_id)
+ {
+ for (/**/; last_id < (*rit).first; ++last_id)
+ {
+ os_ << " 0, // \"<undefined state>\"\n";
+ }
+ os_ << " \"" << (*rit).second << "\"";
+ if (++rit != rend)
+ os_ << ",\n";
+ else
+ os_ << "\n"; // don't generate the final comma
+ }
+ os_ << "};\n\n";
+
+ generate_delimiter(os_);
+ os_ << "// this variable defines the number of lexer states\n";
+ os_ << "std::size_t const lexer_state_count"
+ << (name_suffix[0] ? "_" : "") << name_suffix
+ << " = " << rules_.statemap().size() << ";\n\n";
+ return os_.good();
+ }
+
+ inline bool
+ generate_cpp_state_table (std::ostream &os_, char const* name_suffix
+ , bool bol, bool eol)
+ {
+ std::string suffix(name_suffix[0] ? "_" : "");
+ suffix += name_suffix;
+
+ generate_delimiter(os_);
+ os_ << "// this defines a generic accessors for the information above\n";
+ os_ << "struct lexer" << suffix << "\n{\n";
+ os_ << " // version number and feature-set of compatible static lexer engine\n";
+ os_ << " enum\n";
+ os_ << " {\n static_version = "
+ << boost::lexical_cast<std::string>(SPIRIT_STATIC_LEXER_VERSION) << ",\n";
+ os_ << " supports_bol = " << std::boolalpha << bol << ",\n";
+ os_ << " supports_eol = " << std::boolalpha << eol << "\n";
+ os_ << " };\n\n";
+ os_ << " // return the number of lexer states\n";
+ os_ << " static std::size_t state_count()\n";
+ os_ << " {\n return lexer_state_count" << suffix << "; \n }\n\n";
+ os_ << " // return the name of the lexer state as given by 'idx'\n";
+ os_ << " static char const* state_name(std::size_t idx)\n";
+ os_ << " {\n return lexer_state_names" << suffix << "[idx]; \n }\n\n";
+ os_ << " // return the next matched token\n";
+ os_ << " template<typename Iterator>\n";
+ os_ << " static std::size_t next(std::size_t &start_state_, bool& bol_\n";
+ os_ << " , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n";
+ os_ << " {\n return next_token" << suffix
+ << "(start_state_, bol_, start_token_, end_, unique_id_);\n }\n";
+ os_ << "};\n\n";
+ return os_.good();
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ // generate function body based on traversing the DFA tables
+ template <typename Char>
+ bool generate_function_body_dfa(std::ostream & os_
+ , boost::lexer::basic_state_machine<Char> const &sm_)
+ {
+ std::size_t const dfas_ = sm_.data()._dfa->size();
+ std::size_t const lookups_ = sm_.data()._lookup->front()->size();
+
+ os_ << " enum {end_state_index, id_index, unique_id_index, "
+ "state_index, bol_index,\n";
+ os_ << " eol_index, dead_state_index, dfa_offset};\n\n";
+ os_ << " static std::size_t const npos = "
+ "static_cast<std::size_t>(~0);\n";
+
+ if (dfas_ > 1)
+ {
+ for (std::size_t state_ = 0; state_ < dfas_; ++state_)
+ {
+ std::size_t i_ = 0;
+ std::size_t j_ = 1;
+ std::size_t count_ = lookups_ / 8;
+ std::size_t const* lookup_ = &sm_.data()._lookup[state_]->front();
+ std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front();
+
+ os_ << " static std::size_t const lookup" << state_
+ << "_[" << lookups_ << "] = {\n ";
+ for (/**/; i_ < count_; ++i_)
+ {
+ std::size_t const index_ = i_ * 8;
+ os_ << lookup_[index_];
+ for (/**/; j_ < 8; ++j_)
+ {
+ os_ << ", " << lookup_[index_ + j_];
+ }
+ if (i_ < count_ - 1)
+ {
+ os_ << ",\n ";
+ }
+ j_ = 1;
+ }
+ os_ << " };\n";
+
+ count_ = sm_.data()._dfa[state_]->size ();
+ os_ << " static const std::size_t dfa" << state_ << "_["
+ << count_ << "] = {\n ";
+ count_ /= 8;
+ for (i_ = 0; i_ < count_; ++i_)
+ {
+ std::size_t const index_ = i_ * 8;
+ os_ << dfa_[index_];
+ for (j_ = 1; j_ < 8; ++j_)
+ {
+ os_ << ", " << dfa_[index_ + j_];
+ }
+ if (i_ < count_ - 1)
+ {
+ os_ << ",\n ";
+ }
+ }
+
+ std::size_t const mod_ = sm_.data()._dfa[state_]->size () % 8;
+ if (mod_)
+ {
+ std::size_t const index_ = count_ * 8;
+ if (count_)
+ {
+ os_ << ",\n ";
+ }
+ os_ << dfa_[index_];
+ for (j_ = 1; j_ < mod_; ++j_)
+ {
+ os_ << ", " << dfa_[index_ + j_];
+ }
+ }
+ os_ << " };\n";
+ }
+
+ std::size_t count_ = sm_.data()._dfa_alphabet.size();
+ std::size_t i_ = 1;
+
+ os_ << " static std::size_t const* lookup_arr_[" << count_
+ << "] = { lookup0_";
+ for (i_ = 1; i_ < count_; ++i_)
+ {
+ os_ << ", " << "lookup" << i_ << "_";
+ }
+ os_ << " };\n";
+
+ os_ << " static std::size_t const dfa_alphabet_arr_["
+ << count_ << "] = { ";
+ os_ << sm_.data()._dfa_alphabet.front ();
+ for (i_ = 1; i_ < count_; ++i_)
+ {
+ os_ << ", " << sm_.data()._dfa_alphabet[i_];
+ }
+ os_ << " };\n";
+
+ os_ << " static std::size_t const* dfa_arr_[" << count_
+ << "] = { ";
+ os_ << "dfa0_";
+ for (i_ = 1; i_ < count_; ++i_)
+ {
+ os_ << ", " << "dfa" << i_ << "_";
+ }
+ os_ << " };\n";
+ }
+ else
+ {
+ std::size_t const* lookup_ = &sm_.data()._lookup[0]->front();
+ std::size_t const* dfa_ = &sm_.data()._dfa[0]->front();
+ std::size_t i_ = 0;
+ std::size_t j_ = 1;
+ std::size_t count_ = lookups_ / 8;
+
+ os_ << " static std::size_t const lookup_[";
+ os_ << sm_.data()._lookup[0]->size() << "] = {\n ";
+ for (/**/; i_ < count_; ++i_)
+ {
+ const std::size_t index_ = i_ * 8;
+ os_ << lookup_[index_];
+ for (/**/; j_ < 8; ++j_)
+ {
+ os_ << ", " << lookup_[index_ + j_];
+ }
+ if (i_ < count_ - 1)
+ {
+ os_ << ",\n ";
+ }
+ j_ = 1;
+ }
+ os_ << " };\n";
+
+ os_ << " static std::size_t const dfa_alphabet_ = "
+ << sm_.data()._dfa_alphabet.front () << ";\n";
+ os_ << " static std::size_t const dfa_["
+ << sm_.data()._dfa[0]->size () << "] = {\n ";
+ count_ = sm_.data()._dfa[0]->size () / 8;
+ for (i_ = 0; i_ < count_; ++i_)
+ {
+ const std::size_t index_ = i_ * 8;
+ os_ << dfa_[index_];
+ for (j_ = 1; j_ < 8; ++j_)
+ {
+ os_ << ", " << dfa_[index_ + j_];
+ }
+ if (i_ < count_ - 1)
+ {
+ os_ << ",\n ";
+ }
+ }
+
+ const std::size_t mod_ = sm_.data()._dfa[0]->size () % 8;
+ if (mod_)
+ {
+ const std::size_t index_ = count_ * 8;
+ if (count_)
+ {
+ os_ << ",\n ";
+ }
+ os_ << dfa_[index_];
+ for (j_ = 1; j_ < mod_; ++j_)
+ {
+ os_ << ", " << dfa_[index_ + j_];
+ }
+ }
+ os_ << " };\n";
+ }
+
+ os_ << "\n if (start_token_ == end_)\n";
+ os_ << " {\n";
+ os_ << " unique_id_ = npos;\n";
+ os_ << " return 0;\n";
+ os_ << " }\n\n";
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bool bol = bol_;\n\n";
+ }
+
+ if (dfas_ > 1)
+ {
+ os_ << "again:\n";
+ os_ << " std::size_t const* lookup_ = lookup_arr_[start_state_];\n";
+ os_ << " std::size_t dfa_alphabet_ = dfa_alphabet_arr_[start_state_];\n";
+ os_ << " std::size_t const*dfa_ = dfa_arr_[start_state_];\n";
+ }
+
+ os_ << " std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n";
+ os_ << " Iterator curr_ = start_token_;\n";
+ os_ << " bool end_state_ = *ptr_ != 0;\n";
+ os_ << " std::size_t id_ = *(ptr_ + id_index);\n";
+ os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n";
+ if (dfas_ > 1)
+ {
+ os_ << " std::size_t end_start_state_ = start_state_;\n";
+ }
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bool end_bol_ = bol_;\n";
+ }
+ os_ << " Iterator end_token_ = start_token_;\n\n";
+
+ os_ << " while (curr_ != end_)\n";
+ os_ << " {\n";
+
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " std::size_t const BOL_state_ = ptr_[bol_index];\n\n";
+ }
+
+ if (sm_.data()._seen_EOL_assertion)
+ {
+ os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
+ }
+
+ if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion)
+ {
+ os_ << " if (BOL_state_ && bol)\n";
+ os_ << " {\n";
+ os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
+ os_ << " }\n";
+ os_ << " else if (EOL_state_ && *curr_ == '\\n')\n";
+ os_ << " {\n";
+ os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
+ os_ << " }\n";
+ os_ << " else\n";
+ os_ << " {\n";
+ if (lookups_ == 256)
+ {
+ os_ << " unsigned char index = \n";
+ os_ << " static_cast<unsigned char>(*curr_++);\n";
+ }
+ else
+ {
+ os_ << " std::size_t index = *curr_++\n";
+ }
+ os_ << " bol = (index == '\n') ? true : false;\n";
+ os_ << " std::size_t const state_ = ptr_[\n";
+ os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
+
+ os_ << '\n';
+ os_ << " if (state_ == 0) break;\n";
+ os_ << '\n';
+ os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+ os_ << " }\n\n";
+ }
+ else if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " if (BOL_state_ && bol)\n";
+ os_ << " {\n";
+ os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
+ os_ << " }\n";
+ os_ << " else\n";
+ os_ << " {\n";
+ if (lookups_ == 256)
+ {
+ os_ << " unsigned char index = \n";
+ os_ << " static_cast<unsigned char>(*curr_++);\n";
+ }
+ else
+ {
+ os_ << " std::size_t index = *curr_++\n";
+ }
+ os_ << " bol = (index == '\n') ? true : false;\n";
+ os_ << " std::size_t const state_ = ptr_[\n";
+ os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
+
+ os_ << '\n';
+ os_ << " if (state_ == 0) break;\n";
+ os_ << '\n';
+ os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+ os_ << " }\n\n";
+ }
+ else if (sm_.data()._seen_EOL_assertion)
+ {
+ os_ << " if (EOL_state_ && *curr_ == '\\n')\n";
+ os_ << " {\n";
+ os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
+ os_ << " }\n";
+ os_ << " else\n";
+ os_ << " {\n";
+ if (lookups_ == 256)
+ {
+ os_ << " unsigned char index = \n";
+ os_ << " static_cast<unsigned char>(*curr_++);\n";
+ }
+ else
+ {
+ os_ << " std::size_t index = *curr_++\n";
+ }
+ os_ << " bol = (index == '\n') ? true : false;\n";
+ os_ << " std::size_t const state_ = ptr_[\n";
+ os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
+
+ os_ << '\n';
+ os_ << " if (state_ == 0) break;\n";
+ os_ << '\n';
+ os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+ os_ << " }\n\n";
+ }
+ else
+ {
+ os_ << " std::size_t const state_ =\n";
+
+ if (lookups_ == 256)
+ {
+ os_ << " ptr_[lookup_["
+ "static_cast<unsigned char>(*curr_++)]];\n";
+ }
+ else
+ {
+ os_ << " ptr_[lookup_[*curr_++]];\n";
+ }
+
+ os_ << '\n';
+ os_ << " if (state_ == 0) break;\n";
+ os_ << '\n';
+ os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n";
+ }
+
+ os_ << " if (*ptr_)\n";
+ os_ << " {\n";
+ os_ << " end_state_ = true;\n";
+ os_ << " id_ = *(ptr_ + id_index);\n";
+ os_ << " uid_ = *(ptr_ + unique_id_index);\n";
+ if (dfas_ > 1)
+ {
+ os_ << " end_start_state_ = *(ptr_ + state_index);\n";
+ }
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " end_bol_ = bol;\n";
+ }
+ os_ << " end_token_ = curr_;\n";
+ os_ << " }\n";
+ os_ << " }\n\n";
+
+ if (sm_.data()._seen_EOL_assertion)
+ {
+ os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
+
+ os_ << " if (EOL_state_ && curr_ == end_)\n";
+ os_ << " {\n";
+ os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n";
+
+ os_ << " if (*ptr_)\n";
+ os_ << " {\n";
+ os_ << " end_state_ = true;\n";
+ os_ << " id_ = *(ptr_ + id_index);\n";
+ os_ << " uid_ = *(ptr_ + unique_id_index);\n";
+ if (dfas_ > 1)
+ {
+ os_ << " end_start_state_ = *(ptr_ + state_index);\n";
+ }
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " end_bol_ = bol;\n";
+ }
+ os_ << " end_token_ = curr_;\n";
+ os_ << " }\n";
+ os_ << " }\n\n";
+ }
+
+ os_ << " if (end_state_)\n";
+ os_ << " {\n";
+ os_ << " // return longest match\n";
+ os_ << " start_token_ = end_token_;\n";
+
+ if (dfas_ > 1)
+ {
+ os_ << " start_state_ = end_start_state_;\n";
+ os_ << " if (id_ == 0)\n";
+ os_ << " {\n";
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bol = end_bol_;\n";
+ }
+ os_ << " goto again;\n";
+ os_ << " }\n";
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " else\n";
+ os_ << " {\n";
+ os_ << " bol_ = end_bol_;\n";
+ os_ << " }\n";
+ }
+ }
+ else if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bol_ = end_bol_;\n";
+ }
+
+ os_ << " }\n";
+ os_ << " else\n";
+ os_ << " {\n";
+
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bol_ = (*start_token_ == '\n') ? true : false;\n";
+ }
+
+ os_ << " id_ = npos;\n";
+ os_ << " uid_ = npos;\n";
+ os_ << " }\n\n";
+
+ os_ << " unique_id_ = uid_;\n";
+ os_ << " return id_;\n";
+ return os_.good();
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ inline std::string get_charcode(char ch)
+ {
+ std::string result;
+ switch(ch) {
+ case '\t':
+ result = "\\t";
+ break;
+ case '\b':
+ result = "\\b";
+ break;
+ case '\r':
+ result = "\\r";
+ break;
+ case '\n':
+ result = "\\n";
+ break;
+ case '\f':
+ result = "\\f";
+ break;
+ case '\v':
+ result = "\\v";
+ break;
+ case '\\':
+ result = "\\\\";
+ break;
+ case '\'':
+ result = "\\'";
+ break;
+ default:
+ if (std::isprint(ch))
+ {
+ result = ch;
+ }
+ else
+ {
+ result = "\\x";
+ char buffer[3];
+ result += ltoa(ch, buffer, 16);
+ }
+ break;
+ }
+ return result;
+ }
+
+ inline std::basic_string<wchar_t> get_charcode(wchar_t ch)
+ {
+ // not implemented yet
+ std::basic_string<wchar_t> result;
+ result = ch;
+ return result;
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ template <typename Char>
+ bool generate_function_body_switch(std::ostream & os_
+ , boost::lexer::basic_state_machine<Char> const &sm_)
+ {
+ std::size_t const lookups_ = sm_.data()._lookup->front ()->size ();
+ typename boost::lexer::basic_state_machine<Char>::iterator iter_ = sm_.begin();
+ typename boost::lexer::basic_state_machine<Char>::iterator end_ = sm_.end();
+ std::size_t const dfas_ = sm_.data()._dfa->size ();
+
+ os_ << " static std::size_t const npos = "
+ "static_cast<std::size_t>(~0);\n";
+
+ os_ << "\n if (start_token_ == end_)\n";
+ os_ << " {\n";
+ os_ << " unique_id_ = npos;\n";
+ os_ << " return 0;\n";
+ os_ << " }\n\n";
+
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bool bol = bol_;\n";
+ }
+
+ if (dfas_ > 1)
+ {
+ os_ << "again:\n";
+ }
+
+ os_ << " Iterator curr_ = start_token_;\n";
+ os_ << " bool end_state_ = false;\n";
+ os_ << " std::size_t id_ = npos;\n";
+ os_ << " std::size_t uid_ = npos;\n";
+
+ if (dfas_ > 1)
+ {
+ os_ << " std::size_t end_start_state_ = start_state_;\n";
+ }
+
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bool end_bol_ = bol_;\n";
+ }
+
+ os_ << " Iterator end_token_ = start_token_;\n";
+ os_ << '\n';
+
+ if (dfas_ > 1)
+ {
+ os_ << " switch (start_state_)\n";
+ os_ << " {\n";
+
+ for (std::size_t i_ = 0; i_ < dfas_; ++i_)
+ {
+ os_ << " case " << i_ << ":\n";
+ os_ << " goto state" << i_ << "_0;\n";
+ os_ << " break;\n";
+ }
+
+ os_ << " default:\n";
+ os_ << " goto end;\n";
+ os_ << " break;\n";
+ os_ << " }\n\n";
+ }
+
+ os_ << " " << ((lookups_ == 256) ? "char" : "wchar_t")
+ << " ch_ = 0;\n\n";
+ for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
+ {
+ std::size_t const states_ = iter_->states;
+
+ for (std::size_t state_ = 0; state_ < states_; ++state_)
+ {
+ std::size_t const transitions_ = iter_->transitions;
+ std::size_t t_ = 0;
+
+ if (dfas_ > 1 || dfa_ != 0 || state_ != 0)
+ {
+ os_ << "state" << dfa_ << '_' << state_ << ":\n";
+ }
+ if (iter_->end_state)
+ {
+ os_ << " end_state_ = true;\n";
+ os_ << " id_ = " << iter_->id << ";\n";
+ os_ << " uid_ = " << iter_->unique_id << ";\n";
+ os_ << " end_token_ = curr_;\n";
+
+ if (dfas_ > 1)
+ {
+ os_ << " end_start_state_ = " << iter_->goto_dfa <<
+ ";\n";
+ }
+
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " end_bol_ = bol;\n";
+ }
+ }
+
+ os_ << " if (curr_ == end_) goto end;\n";
+ os_ << " ch_ = *curr_;\n";
+ if (iter_->bol_index != boost::lexer::npos)
+ {
+ os_ << "\n if (bol) goto state" << dfa_ << '_'
+ << iter_->bol_index << ";\n";
+ }
+ if (iter_->eol_index != boost::lexer::npos)
+ {
+ os_ << "\n if (ch_ == '\n') goto state" << dfa_
+ << '_' << iter_->eol_index << ";\n";
+ }
+ os_ << " ++curr_;\n";
+
+
+ for (/**/; t_ < transitions_; ++t_)
+ {
+ char const *ptr_ = iter_->token._charset.c_str();
+ char const *end_ = ptr_ + iter_->token._charset.size();
+ char start_char_ = 0;
+ char curr_char_ = 0;
+ bool range_ = false;
+ bool first_char_ = true;
+
+ os_ << "\n if (";
+
+ while (ptr_ != end_)
+ {
+ curr_char_ = *ptr_++;
+
+ if (*ptr_ == curr_char_ + 1)
+ {
+ if (!range_)
+ {
+ start_char_ = curr_char_;
+ }
+ range_ = true;
+ }
+ else
+ {
+ if (!first_char_)
+ {
+ os_ << " || ";
+ }
+
+ first_char_ = false;
+
+ if (range_)
+ {
+ if (iter_->token._negated)
+ {
+ os_ << "!";
+ }
+
+ os_ << "(ch_ >= '" << get_charcode(start_char_);
+ os_ << "' && ch_ <= '" << get_charcode(curr_char_) << "')";
+ range_ = false;
+ }
+ else
+ {
+ os_ << "ch_ ";
+
+ if (iter_->token._negated)
+ {
+ os_ << "!=";
+ }
+ else
+ {
+ os_ << "==";
+ }
+
+ os_ << " '" << get_charcode(curr_char_) << "'";
+ }
+ }
+ }
+
+ os_ << ") goto state" << dfa_ << '_' << iter_->goto_state
+ << ";\n";
+ ++iter_;
+ }
+
+ if (transitions_) os_ << '\n';
+
+ os_ << " goto end;\n";
+ os_ << '\n';
+
+ if (transitions_ == 0) ++iter_;
+ }
+ }
+
+ os_ << "end:\n";
+ os_ << " if (end_state_)\n";
+ os_ << " {\n";
+ os_ << " // return longest match\n";
+ os_ << " start_token_ = end_token_;\n";
+
+ if (dfas_ > 1)
+ {
+ os_ << " start_state_ = end_start_state_;\n";
+ os_ << "\n if (id_ == 0)\n";
+ os_ << " {\n";
+
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bol = end_bol_;\n";
+ }
+
+ os_ << " goto again;\n";
+ os_ << " }\n";
+
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " else\n";
+ os_ << " {\n";
+ os_ << " bol_ = end_bol_;\n";
+ os_ << " }\n";
+ }
+ }
+ else if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bol_ = end_bol_;\n";
+ }
+
+ os_ << " }\n";
+ os_ << " else\n";
+ os_ << " {\n";
+
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n";
+ }
+ os_ << " id_ = npos;\n";
+ os_ << " uid_ = npos;\n";
+ os_ << " }\n\n";
+
+ os_ << " unique_id_ = uid_;\n";
+ os_ << " return id_;\n";
+ return os_.good();
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Generate a tokenizer for the given state machine.
+ template <typename Char, typename F>
+ inline bool
+ generate_cpp (boost::lexer::basic_state_machine<Char> const& sm_
+ , boost::lexer::basic_rules<Char> const& rules_
+ , std::ostream &os_, char const* name_suffix, F generate_function_body)
+ {
+ if (sm_.data()._lookup->empty())
+ return false;
+
+ std::size_t const dfas_ = sm_.data()._dfa->size();
+ std::size_t const lookups_ = sm_.data()._lookup->front()->size();
+
+ os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
+ os_ << "// Copyright (c) 2008-2009 Hartmut Kaiser\n";
+ os_ << "//\n";
+ os_ << "// Distributed under the Boost Software License, "
+ "Version 1.0. (See accompanying\n";
+ os_ << "// file licence_1_0.txt or copy at "
+ "http://www.boost.org/LICENSE_1_0.txt)\n\n";
+ os_ << "// Auto-generated by boost::lexer, do not edit\n\n";
+
+ std::string guard(name_suffix);
+ guard += name_suffix[0] ? "_" : "";
+ guard += __DATE__ "_" __TIME__;
+ std::string::size_type p = guard.find_first_of(": ");
+ while (std::string::npos != p)
+ {
+ guard.replace(p, 1, "_");
+ p = guard.find_first_of(": ", p);
+ }
+ boost::to_upper(guard);
+
+ os_ << "#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << ")\n";
+ os_ << "#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << "\n\n";
+
+ os_ << "#include <boost/detail/iterator.hpp>\n";
+ os_ << "#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>\n\n";
+
+ generate_delimiter(os_);
+ os_ << "// the generated table of state names and the tokenizer have to be\n"
+ "// defined in the boost::spirit::lex::lexertl::static_ namespace\n";
+ os_ << "namespace boost { namespace spirit { namespace lex { "
+ "namespace lexertl { namespace static_ {\n\n";
+
+ // generate the lexer state information variables
+ if (!generate_cpp_state_info(rules_, os_, name_suffix))
+ return false;
+
+ generate_delimiter(os_);
+ os_ << "// this function returns the next matched token\n";
+ os_ << "template<typename Iterator>\n";
+ os_ << "std::size_t next_token" << (name_suffix[0] ? "_" : "")
+ << name_suffix << " (";
+
+ if (dfas_ > 1)
+ {
+ os_ << "std::size_t& start_state_, ";
+ }
+ else
+ {
+ os_ << "std::size_t& /*start_state_*/, ";
+ }
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << "bool& bol_, ";
+ }
+ else
+ {
+ os_ << "bool& /*bol_*/, ";
+ }
+ os_ << "\n ";
+
+ os_ << "Iterator &start_token_, Iterator const& end_, ";
+ os_ << "std::size_t& unique_id_)\n";
+ os_ << "{\n";
+ if (!generate_function_body(os_, sm_))
+ return false;
+ os_ << "}\n\n";
+
+ if (!generate_cpp_state_table(os_, name_suffix
+ , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion))
+ {
+ return false;
+ }
+
+ os_ << "}}}}} // namespace boost::spirit::lex::lexertl::static_\n\n";
+
+ os_ << "#endif\n";
+
+ return os_.good();
+ }
+
+ } // namespace detail
+
+ ///////////////////////////////////////////////////////////////////////////
+ template <typename Lexer, typename F>
+ inline bool
+ generate_static(Lexer const& lexer, std::ostream& os
+ , char const* name_suffix, F f)
+ {
+ if (!lexer.init_dfa())
+ return false;
+ return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os
+ , name_suffix, f);
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ // deprecated function, will be removed in the future (this has been
+ // replaced by the function generate_static_dfa - see below).
+ template <typename Lexer>
+ inline bool
+ generate_static(Lexer const& lexer, std::ostream& os
+ , char const* name_suffix = "")
+ {
+ return generate_static(lexer, os, name_suffix
+ , &detail::generate_function_body_dfa<typename Lexer::char_type>);
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ template <typename Lexer>
+ inline bool
+ generate_static_dfa(Lexer const& lexer, std::ostream& os
+ , char const* name_suffix = "")
+ {
+ return generate_static(lexer, os, name_suffix
+ , &detail::generate_function_body_dfa<typename Lexer::char_type>);
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ template <typename Lexer>
+ inline bool
+ generate_static_switch(Lexer const& lexer, std::ostream& os
+ , char const* name_suffix = "")
+ {
+ return generate_static(lexer, os, name_suffix
+ , &detail::generate_function_body_switch<typename Lexer::char_type>);
+ }
+
+///////////////////////////////////////////////////////////////////////////////
+}}}}
+
+#endif


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk