|
Boost-Commit : |
Subject: [Boost-commit] svn:boost r57529 - in trunk/boost/spirit/home: lex/lexer/lexertl support/detail/lexer
From: hartmut.kaiser_at_[hidden]
Date: 2009-11-09 21:00:28
Author: hkaiser
Date: 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
New Revision: 57529
URL: http://svn.boost.org/trac/boost/changeset/57529
Log:
Spirit: implemented forward iterator support for lexer
Text files modified:
trunk/boost/spirit/home/lex/lexer/lexertl/functor_data.hpp | 13 +
trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp | 122 ++++++++++++++------
trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp | 56 ++++++--
trunk/boost/spirit/home/lex/lexer/lexertl/static_functor_data.hpp | 16 +-
trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp | 11 +
trunk/boost/spirit/home/support/detail/lexer/generate_cpp.hpp | 236 ++++++++++++++++++++++-----------------
trunk/boost/spirit/home/support/detail/lexer/generator.hpp | 9 +
trunk/boost/spirit/home/support/detail/lexer/input.hpp | 19 ++-
8 files changed, 303 insertions(+), 179 deletions(-)
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/functor_data.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/functor_data.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/functor_data.hpp 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -54,7 +54,8 @@
data (IterData const& data_, Iterator& first, Iterator const& last)
: first_(first), last_(last)
, state_machine_(data_.state_machine_)
- , rules_(data_.rules_) {}
+ , rules_(data_.rules_)
+ , bol_(data_.state_machine_.data()._seen_BOL_assertion) {}
// The following functions are used by the implementation of the
// placeholder '_state'.
@@ -88,7 +89,7 @@
return it;
}
- // The function more() is used by the implemention of the support
+ // The function more() is used by the implementation of the support
// function lex::more(). Its functionality is equivalent to flex'
// function yymore(): it tells the lexer that the next time it
// matches a rule, the corresponding token should be appended onto
@@ -130,7 +131,7 @@
std::size_t next(Iterator& end, std::size_t& unique_id)
{
typedef basic_iterator_tokeniser<Iterator> tokenizer;
- return tokenizer::next(state_machine_, first_, end, last_
+ return tokenizer::next(state_machine_, bol_, end, last_
, unique_id);
}
@@ -162,6 +163,8 @@
boost::lexer::basic_state_machine<char_type> const& state_machine_;
boost::lexer::basic_rules<char_type> const& rules_;
+ bool bol_; // helper storing whether last character was \n
+
private:
// silence MSVC warning C4512: assignment operator could not be generated
data& operator= (data const&);
@@ -223,7 +226,7 @@
{
typedef basic_iterator_tokeniser<Iterator> tokenizer;
return tokenizer::next(this->state_machine_, state_,
- this->get_first(), end, this->get_eoi(), unique_id);
+ this->bol_, end, this->get_eoi(), unique_id);
}
std::size_t& get_state() { return state_; }
@@ -290,7 +293,7 @@
return it;
}
- // The function more() is used by the implemention of the support
+ // The function more() is used by the implementation of the support
// function lex::more(). Its functionality is equivalent to flex'
// function yymore(): it tells the lexer that the next time it
// matches a rule, the corresponding token should be appended onto
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -89,7 +89,8 @@
}
inline bool
- generate_cpp_state_table (std::ostream &os_, char const* name_suffix = "")
+ generate_cpp_state_table (std::ostream &os_, char const* name_suffix
+ , bool bol, bool eol)
{
std::string suffix(name_suffix[0] ? "_" : "");
suffix += name_suffix;
@@ -97,9 +98,13 @@
generate_delimiter(os_);
os_ << "// this defines a generic accessors for the information above\n";
os_ << "struct lexer" << suffix << "\n{\n";
- os_ << " // version number of compatible static lexer engine\n";
- os_ << " enum { static_version = "
- << boost::lexical_cast<std::string>(SPIRIT_STATIC_LEXER_VERSION) << " };\n\n";
+ os_ << " // version number and feature-set of compatible static lexer engine\n";
+ os_ << " enum\n";
+ os_ << " {\n static_version = "
+ << boost::lexical_cast<std::string>(SPIRIT_STATIC_LEXER_VERSION) << ",\n";
+ os_ << " supports_bol = " << std::boolalpha << bol << ",\n";
+ os_ << " supports_eol = " << std::boolalpha << eol << "\n";
+ os_ << " };\n\n";
os_ << " // return the number of lexer states\n";
os_ << " static std::size_t const state_count()\n";
os_ << " {\n return lexer_state_count" << suffix << "; \n }\n\n";
@@ -108,10 +113,10 @@
os_ << " {\n return lexer_state_names" << suffix << "[idx]; \n }\n\n";
os_ << " // return the next matched token\n";
os_ << " template<typename Iterator>\n";
- os_ << " static std::size_t next(std::size_t &start_state_, Iterator const& start_\n";
+ os_ << " static std::size_t next(std::size_t &start_state_, bool& bol_\n";
os_ << " , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n";
os_ << " {\n return next_token" << suffix
- << "(start_state_, start_, start_token_, end_, unique_id_);\n }\n";
+ << "(start_state_, bol_, start_token_, end_, unique_id_);\n }\n";
os_ << "};\n\n";
return os_.good();
}
@@ -184,11 +189,11 @@
if (sm_.data()._seen_BOL_assertion)
{
- os_ << "Iterator const& start_, ";
+ os_ << "bool& bol_, ";
}
else if (!optimize_parameters)
{
- os_ << "Iterator const& /*start_*/, ";
+ os_ << "bool& /*bol_*/, ";
}
if (dfas_ > 1 || sm_.data()._seen_BOL_assertion || !optimize_parameters)
@@ -391,7 +396,15 @@
os_ << " };\n";
}
- os_ << "\n if (start_token_ == end_) return 0;\n\n";
+ os_ << "\n if (start_token_ == end_)\n";
+ os_ << " {\n";
+ os_ << " unique_id_ = boost::lexer::npos;\n";
+ os_ << " return 0;\n";
+ os_ << " }\n\n";
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bool bol = bol_;\n\n";
+ }
if (dfas_ > 1)
{
@@ -401,11 +414,19 @@
os_ << " const std::size_t *dfa_ = dfa_arr_[start_state_];\n";
}
- os_ << " const std::size_t *ptr_ = dfa_ + dfa_alphabet_;\n";
+ os_ << " std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n";
os_ << " Iterator curr_ = start_token_;\n";
os_ << " bool end_state_ = *ptr_ != 0;\n";
os_ << " std::size_t id_ = *(ptr_ + id_index);\n";
os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n";
+ if (dfas_ > 1)
+ {
+ os_ << " std::size_t end_start_state_ = start_state_;\n";
+ }
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bool end_bol_ = bol_;\n";
+ }
os_ << " Iterator end_token_ = start_token_;\n\n";
os_ << " while (curr_ != end_)\n";
@@ -423,8 +444,7 @@
if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion)
{
- os_ << " if (BOL_state_ && (start_token_ == start_ ||\n";
- os_ << " *(start_token_ - 1) == '\\n'))\n";
+ os_ << " if (BOL_state_ && bol)\n";
os_ << " {\n";
os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
os_ << " }\n";
@@ -434,17 +454,18 @@
os_ << " }\n";
os_ << " else\n";
os_ << " {\n";
- os_ << " std::size_t const state_ =\n";
-
if (lookups_ == 256)
{
- os_ << " ptr_[lookup_[<typename Traits::index_type>"
- "(*curr_++)]];\n";
+ os_ << " unsigned char index = \n";
+ os_ << " static_cast<unsigned char>(*curr_++);\n";
}
else
{
- os_ << " ptr_[lookup_[*curr_++]];\n";
+ os_ << " std::size_t index = *curr_++\n";
}
+ os_ << " bol = (index == '\n') ? true : false;\n";
+ os_ << " std::size_t const state_ = ptr_[\n";
+ os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
os_ << '\n';
os_ << " if (state_ == 0) break;\n";
@@ -454,24 +475,24 @@
}
else if (sm_.data()._seen_BOL_assertion)
{
- os_ << " if (BOL_state_ && (start_token_ == start_ ||\n";
- os_ << " *(start_token_ - 1) == '\\n'))\n";
+ os_ << " if (BOL_state_ && bol)\n";
os_ << " {\n";
os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
os_ << " }\n";
os_ << " else\n";
os_ << " {\n";
- os_ << " std::size_t const state_ =\n";
-
if (lookups_ == 256)
{
- os_ << " ptr_[lookup_[static_cast<unsigned char>"
- "(*curr_++)]];\n";
+ os_ << " unsigned char index = \n";
+ os_ << " static_cast<unsigned char>(*curr_++);\n";
}
else
{
- os_ << " ptr_[lookup_[*curr_++]];\n";
+ os_ << " std::size_t index = *curr_++\n";
}
+ os_ << " bol = (index == '\n') ? true : false;\n";
+ os_ << " std::size_t const state_ = ptr_[\n";
+ os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
os_ << '\n';
os_ << " if (state_ == 0) break;\n";
@@ -487,17 +508,18 @@
os_ << " }\n";
os_ << " else\n";
os_ << " {\n";
- os_ << " std::size_t const state_ =\n";
-
if (lookups_ == 256)
{
- os_ << " ptr_[lookup_[static_cast<unsigned char>"
- "(*curr_++)]];\n";
+ os_ << " unsigned char index = \n";
+ os_ << " static_cast<unsigned char>(*curr_++);\n";
}
else
{
- os_ << " ptr_[lookup_[*curr_++]];\n";
+ os_ << " std::size_t index = *curr_++\n";
}
+ os_ << " bol = (index == '\n') ? true : false;\n";
+ os_ << " std::size_t const state_ = ptr_[\n";
+ os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
os_ << '\n';
os_ << " if (state_ == 0) break;\n";
@@ -530,12 +552,14 @@
os_ << " end_state_ = true;\n";
os_ << " id_ = *(ptr_ + id_index);\n";
os_ << " uid_ = *(ptr_ + unique_id_index);\n";
-
if (dfas_ > 1)
{
- os_ << " start_state_ = *(ptr_ + state_index);\n";
+ os_ << " end_start_state_ = *(ptr_ + state_index);\n";
+ }
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " end_bol_ = bol;\n";
}
-
os_ << " end_token_ = curr_;\n";
os_ << " }\n";
os_ << " }\n\n";
@@ -553,12 +577,14 @@
os_ << " end_state_ = true;\n";
os_ << " id_ = *(ptr_ + id_index);\n";
os_ << " uid_ = *(ptr_ + unique_id_index);\n";
-
if (dfas_ > 1)
{
- os_ << " start_state_ = *(ptr_ + state_index);\n";
+ os_ << " end_start_state_ = *(ptr_ + state_index);\n";
+ }
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " end_bol_ = bol;\n";
}
-
os_ << " end_token_ = curr_;\n";
os_ << " }\n";
os_ << " }\n\n";
@@ -567,17 +593,36 @@
os_ << " if (end_state_)\n";
os_ << " {\n";
os_ << " // return longest match\n";
+ os_ << " start_state_ = end_start_state_;\n";
os_ << " start_token_ = end_token_;\n";
if (dfas_ > 1)
{
- os_ << " if (id_ == 0) goto again;\n";
+ os_ << " if (id_ == 0)\n";
+ os_ << " {\n";
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bol_ = end_bol_;\n";
+ }
+ os_ << " goto again;\n";
+ os_ << " }\n";
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " else\n";
+ os_ << " {\n";
+ os_ << " bol_ = end_bol_;\n";
+ os_ << " }\n";
+ }
}
os_ << " }\n";
os_ << " else\n";
os_ << " {\n";
+ if (sm_.data()._seen_BOL_assertion)
+ {
+ os_ << " bol_ = (*start_token_ == '\n') ? true : false;\n";
+ }
if (skip_on_nomatch)
{
os_ << " // No match causes char to be skipped\n";
@@ -592,8 +637,11 @@
os_ << " return id_;\n";
os_ << "}\n\n";
- if (!generate_cpp_state_table(os_, name_suffix))
+ if (!generate_cpp_state_table(os_, name_suffix
+ , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion))
+ {
return false;
+ }
os_ << "}}}}} // namespace boost::spirit::lex::lexertl::static_\n\n";
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -30,9 +30,8 @@
static std::size_t next (
boost::lexer::basic_state_machine<char_type> const& state_machine_
- , std::size_t &dfa_state_, Iterator const& start_
- , Iterator &start_token_, Iterator const& end_
- , std::size_t& unique_id_)
+ , std::size_t &dfa_state_, bool& bol_, Iterator &start_token_
+ , Iterator const& end_, std::size_t& unique_id_)
{
if (start_token_ == end_)
{
@@ -40,16 +39,21 @@
return 0;
}
+ bool bol = bol_;
+
again:
std::size_t const* lookup_ = &state_machine_.data()._lookup[dfa_state_]->
front ();
std::size_t dfa_alphabet_ = state_machine_.data()._dfa_alphabet[dfa_state_];
std::size_t const* dfa_ = &state_machine_.data()._dfa[dfa_state_]->front ();
+
std::size_t const* ptr_ = dfa_ + dfa_alphabet_;
Iterator curr_ = start_token_;
bool end_state_ = *ptr_ != 0;
std::size_t id_ = *(ptr_ + boost::lexer::id_index);
std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
+ std::size_t end_start_state_ = dfa_state_;
+ bool end_bol_ = bol_;
Iterator end_token_ = start_token_;
while (curr_ != end_)
@@ -57,8 +61,7 @@
std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index];
std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
- if (BOL_state_ && (start_token_ == start_ ||
- *(start_token_ - 1) == '\n'))
+ if (BOL_state_ && bol)
{
ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
}
@@ -77,6 +80,7 @@
index_type index =
boost::lexer::char_traits<value_type>::call(*curr_++);
+ bol = (index == '\n') ? true : false;
std::size_t const state_ = ptr_[
lookup_[static_cast<std::size_t>(index)]];
@@ -93,7 +97,8 @@
end_state_ = true;
id_ = *(ptr_ + boost::lexer::id_index);
uid_ = *(ptr_ + boost::lexer::unique_id_index);
- dfa_state_ = *(ptr_ + boost::lexer::state_index);
+ end_start_state_ = *(ptr_ + boost::lexer::state_index);
+ end_bol_ = bol;
end_token_ = curr_;
}
}
@@ -109,19 +114,29 @@
end_state_ = true;
id_ = *(ptr_ + boost::lexer::id_index);
uid_ = *(ptr_ + boost::lexer::unique_id_index);
- dfa_state_ = *(ptr_ + boost::lexer::state_index);
+ end_start_state_ = *(ptr_ + boost::lexer::state_index);
+ end_bol_ = bol;
end_token_ = curr_;
}
}
if (end_state_) {
// return longest match
+ dfa_state_ = end_start_state_;
start_token_ = end_token_;
- if (id_ == 0)
+ if (id_ == 0)
+ {
+ bol = end_bol_;
goto again;
+ }
+ else
+ {
+ bol_ = end_bol_;
+ }
}
else {
+ bol_ = (*start_token_ == '\n') ? true : false;
id_ = boost::lexer::npos;
uid_ = boost::lexer::npos;
}
@@ -133,19 +148,26 @@
///////////////////////////////////////////////////////////////////////
static std::size_t next (
boost::lexer::basic_state_machine<char_type> const& state_machine_
- , Iterator const& start_, Iterator &start_token_, Iterator const& end_
+ , bool& bol_, Iterator &start_token_, Iterator const& end_
, std::size_t& unique_id_)
{
- if (start_token_ == end_) return 0;
+ if (start_token_ == end_)
+ {
+ unique_id_ = boost::lexer::npos;
+ return 0;
+ }
+ bool bol = bol_;
std::size_t const* lookup_ = &state_machine_.data()._lookup[0]->front();
std::size_t dfa_alphabet_ = state_machine_.data()._dfa_alphabet[0];
std::size_t const* dfa_ = &state_machine_.data()._dfa[0]->front ();
std::size_t const* ptr_ = dfa_ + dfa_alphabet_;
+
Iterator curr_ = start_token_;
bool end_state_ = *ptr_ != 0;
std::size_t id_ = *(ptr_ + boost::lexer::id_index);
std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
+ bool end_bol_ = bol_;
Iterator end_token_ = start_token_;
while (curr_ != end_)
@@ -153,8 +175,7 @@
std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index];
std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
- if (BOL_state_ && (start_token_ == start_ ||
- *(start_token_ - 1) == '\n'))
+ if (BOL_state_ && bol)
{
ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
}
@@ -170,9 +191,10 @@
typedef typename
boost::lexer::char_traits<value_type>::index_type
index_type;
-
+
index_type index =
boost::lexer::char_traits<value_type>::call(*curr_++);
+ bol = (index == '\n') ? true : false;
std::size_t const state_ = ptr_[
lookup_[static_cast<std::size_t>(index)]];
@@ -189,6 +211,7 @@
end_state_ = true;
id_ = *(ptr_ + boost::lexer::id_index);
uid_ = *(ptr_ + boost::lexer::unique_id_index);
+ end_bol_ = bol;
end_token_ = curr_;
}
}
@@ -204,15 +227,18 @@
end_state_ = true;
id_ = *(ptr_ + boost::lexer::id_index);
uid_ = *(ptr_ + boost::lexer::unique_id_index);
+ end_bol_ = bol;
end_token_ = curr_;
}
}
if (end_state_) {
// return longest match
+ bol_ = end_bol_;
start_token_ = end_token_;
}
else {
+ bol_ = *start_token_ == '\n';
id_ = boost::lexer::npos;
uid_ = boost::lexer::npos;
}
@@ -222,10 +248,6 @@
}
};
- ///////////////////////////////////////////////////////////////////////////
- typedef basic_iterator_tokeniser<char const *> tokeniser;
- typedef basic_iterator_tokeniser<wchar_t const *> wtokeniser;
-
}}}}
#endif
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/static_functor_data.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/static_functor_data.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/static_functor_data.hpp 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -61,7 +61,8 @@
, std::size_t> wrap_action_type;
typedef std::size_t (*next_token_functor)(std::size_t&,
- Iterator const&, Iterator&, Iterator const&, std::size_t&);
+as Iterator const&, Iterator&, Iterator const&, std::size_t&);
+ bool&, Iterator&, Iterator const&, std::size_t&);
typedef char_type const* (*get_state_name_type)(std::size_t);
// initialize the shared data
@@ -70,7 +71,8 @@
, Iterator const& last)
: first_(first), last_(last)
, next_token_(data.next_)
- , get_state_name_(data.get_state_name_){}
+ , get_state_name_(data.get_state_name_)
+ , bol_(data.bol_) {}
// The following functions are used by the implementation of the
// placeholder '_state'.
@@ -107,7 +109,7 @@
return it;
}
- // The function more() is used by the implemention of the support
+ // The function more() is used by the implementation of the support
// function lex::more(). Its functionality is equivalent to flex'
// function yymore(): it tells the lexer that the next time it
// matches a rule, the corresponding token should be appended onto
@@ -149,7 +151,7 @@
std::size_t next(Iterator& end, std::size_t& unique_id)
{
std::size_t state;
- return next_token_(state, first_, end, last_, unique_id);
+ return next_token_(state, bol_, end, last_, unique_id);
}
// nothing to invoke, so this is empty
@@ -180,6 +182,8 @@
next_token_functor next_token_;
get_state_name_type get_state_name_;
+ bool bol_;
+
private:
// silence MSVC warning C4512: assignment operator could not be generated
static_data& operator= (static_data const&);
@@ -242,7 +246,7 @@
// underlying input sequence.
std::size_t next(Iterator& end, std::size_t& unique_id)
{
- return this->next_token_(state_, this->first_, end, this->last_
+ return this->next_token_(state_, this->bol_, end, this->last_
, unique_id);
}
@@ -312,7 +316,7 @@
return it;
}
- // The function more() is used by the implemention of the support
+ // The function more() is used by the implementation of the support
// function lex::more(). Its functionality is equivalent to flex'
// function yymore(): it tells the lexer that the next time it
// matches a rule, the corresponding token should be appended onto
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -139,15 +139,17 @@
iterator_data_type(next_token_functor next
, semantic_actions_type const& actions
- , get_state_name_type get_state_name, std::size_t num_states)
+ , get_state_name_type get_state_name, std::size_t num_states
+ , bool bol)
: next_(next), actions_(actions), get_state_name_(get_state_name)
- , num_states_(num_states)
+ , num_states_(num_states), bol_(bol)
{}
next_token_functor next_;
semantic_actions_type const& actions_;
get_state_name_type get_state_name_;
std::size_t num_states_;
+ bool bol_;
private:
// silence MSVC warning C4512: assignment operator could not be generated
@@ -173,8 +175,9 @@
, char_type const* initial_state = 0) const
{
iterator_data_type iterator_data(
- &tables_type::template next<Iterator_>, actions_,
- &tables_type::state_name, tables_type::state_count()
+ &tables_type::template next<Iterator_>, actions_
+ , &tables_type::state_name, tables_type::state_count()
+ , tables_type::supports_bol
);
return iterator_type(iterator_data, first, last, initial_state);
}
Modified: trunk/boost/spirit/home/support/detail/lexer/generate_cpp.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/generate_cpp.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/generate_cpp.hpp 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -79,7 +79,7 @@
upper_name_.begin (), ::toupper);
os_ << "#ifndef " << upper_name_ + '\n';
os_ << "#define " << upper_name_ + '\n';
- os_ << "// Copyright (c) 2008 Ben Hanson\n";
+ os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
os_ << "//\n";
os_ << "// Distributed under the Boost Software License, "
"Version 1.0. (See accompanying\n";
@@ -94,25 +94,6 @@
os_ << "std::size_t &start_state_, ";
}
- if (sm_._seen_BOL_assertion || !optimise_parameters_)
- {
- if (use_pointers_)
- {
- os_ << iterator_ << " const ";
- }
- else
- {
- os_ << "const " << iterator_;
- }
-
- os_ << "start_, ";
- }
-
- if (dfas_ > 1 || sm_._seen_BOL_assertion || !optimise_parameters_)
- {
- os_ << "\n ";
- }
-
if (use_pointers_)
{
os_ << iterator_ << " &";
@@ -133,11 +114,18 @@
os_ << "const " << iterator_;
}
- os_ << "end_)\n";
+ os_ << "end_, \n";
+ os_ << " std::size_t &unique_id_";
+
+ if (sm_._seen_BOL_assertion || !optimise_parameters_)
+ {
+ os_ << ", bool &beg_of_line_";
+ }
+
+ os_ << ")\n";
os_ << "{\n";
- os_ << " enum {end_state_index, id_index, state_index, bol_index, "
- "eol_index,\n";
- os_ << " dead_state_index, dfa_offset};\n";
+ os_ << " enum {end_state_index, id_index, unique_id_index, state_index, bol_index,\n";
+ os_ << " eol_index, dead_state_index, dfa_offset};\n";
os_ << " static const std::size_t npos = static_cast"
"<std::size_t>(~0);\n";
@@ -330,7 +318,11 @@
os_ << "};\n";
}
- os_ << "\n if (start_token_ == end_) return 0;\n\n";
+ os_ << "\n if (start_token_ == end_)\n";
+ os_ << " {\n";
+ os_ << " unique_id_ = npos;\n";
+ os_ << " return 0;\n";
+ os_ << " }\n\n";
if (dfas_ > 1)
{
@@ -346,6 +338,19 @@
os_ << " Iterator curr_ = start_token_;\n";
os_ << " bool end_state_ = *ptr_ != 0;\n";
os_ << " std::size_t id_ = *(ptr_ + id_index);\n";
+ os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n";
+
+ if (dfas_ > 1)
+ {
+ os_ << " std::size_t end_start_state_ = start_state_;\n";
+ }
+
+ if (sm_._seen_BOL_assertion)
+ {
+ os_ << " bool bol_ = beg_of_line_;\n";
+ os_ << " bool end_bol_ = bol_;\n";
+ }
+
os_ << " Iterator end_token_ = start_token_;\n";
os_ << '\n';
os_ << " while (curr_ != end_)\n";
@@ -366,108 +371,89 @@
os_ << '\n';
}
- if (sm_._seen_BOL_assertion && sm_._seen_EOL_assertion)
+ if (sm_._seen_BOL_assertion)
{
- os_ << " if (BOL_state_ && (start_token_ == start_ ||\n";
- os_ << " *(start_token_ - 1) == '\\n'))\n";
+ os_ << " if (BOL_state_ && bol_)\n";
os_ << " {\n";
os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
os_ << " }\n";
- os_ << " else if (EOL_state_ && *curr_ == '\\n')\n";
- os_ << " {\n";
- os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
- os_ << " }\n";
- os_ << " else\n";
- os_ << " {\n";
- os_ << " const std::size_t state_ =\n";
+ }
- if (lookups_ == 256)
- {
- os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
- os_ << " (*curr_++)]];\n";
- }
- else
+ if (sm_._seen_EOL_assertion)
+ {
+ os_ << " ";
+
+ if (sm_._seen_BOL_assertion)
{
- os_ << " ptr_[lookup_[*curr_++]];\n";
+ os_ << "else ";
}
- os_ << '\n';
- os_ << " if (state_ == 0) break;\n";
- os_ << '\n';
- os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+ os_ << "if (EOL_state_ && *curr_ == '\\n')\n";
+ os_ << " {\n";
+ os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
os_ << " }\n";
}
- else if (sm_._seen_BOL_assertion)
+
+ std::string tab_ (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion ? " " : "");
+
+ if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
{
- os_ << " if (BOL_state_ && (start_token_ == start_ ||\n";
- os_ << " *(start_token_ - 1) == '\\n'))\n";
- os_ << " {\n";
- os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
- os_ << " }\n";
os_ << " else\n";
os_ << " {\n";
- os_ << " const std::size_t state_ =\n";
+ }
+
+ if (sm_._seen_BOL_assertion)
+ {
+ os_ << " ";
if (lookups_ == 256)
{
- os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
- os_ << " (*curr_++)]];\n";
+ os_ << "char";
}
else
{
- os_ << " ptr_[lookup_[*curr_++]];\n";
+ os_ << "wchar_t";
}
- os_ << '\n';
- os_ << " if (state_ == 0) break;\n";
- os_ << '\n';
- os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
- os_ << " }\n";
+ os_ << " prev_char_ = *curr_++;\n\n";
+ os_ << " bol_ = prev_char_ == '\\n';\n\n";
}
- else if (sm_._seen_EOL_assertion)
- {
- os_ << " if (EOL_state_ && *curr_ == '\\n')\n";
- os_ << " {\n";
- os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
- os_ << " }\n";
- os_ << " else\n";
- os_ << " {\n";
- os_ << " const std::size_t state_ =\n";
- if (lookups_ == 256)
- {
- os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
- os_ << " (*curr_++)]];\n";
- }
- else
- {
- os_ << " ptr_[lookup_[*curr_++]];\n";
- }
+ os_ << tab_;
+ os_ << " const std::size_t state_ =\n";
+ os_ << tab_;
+ os_ << " ptr_[lookup_[";
- os_ << '\n';
- os_ << " if (state_ == 0) break;\n";
- os_ << '\n';
- os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
- os_ << " }\n";
+ if (lookups_ == 256)
+ {
+ os_ << "static_cast<unsigned char>(";
+ }
+
+ if (sm_._seen_BOL_assertion)
+ {
+ os_ << "prev_char";
}
else
{
- os_ << " const std::size_t state_ =\n";
+ os_ << "*curr_++";
+ }
- if (lookups_ == 256)
- {
- os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
- os_ << " (*curr_++)]];\n";
- }
- else
- {
- os_ << " ptr_[lookup_[*curr_++]];\n";
- }
- os_ << '\n';
- os_ << " if (state_ == 0) break;\n";
- os_ << '\n';
- os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+ if (lookups_ == 256)
+ {
+ os_ << ')';
+ }
+
+ os_ << "]];\n\n";
+
+ os_ << tab_;
+ os_ << " if (state_ == 0) break;\n\n";
+ os_ << tab_;
+ os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+
+ if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
+ {
+ os_ << " }\n";
}
os_ << '\n';
@@ -475,10 +461,16 @@
os_ << " {\n";
os_ << " end_state_ = true;\n";
os_ << " id_ = *(ptr_ + id_index);\n";
+ os_ << " uid_ = *(ptr_ + unique_id_index);\n";
if (dfas_ > 1)
{
- os_ << " start_state_ = *(ptr_ + state_index);\n";
+ os_ << " end_start_state_ = *(ptr_ + state_index);\n";
+ }
+
+ if (sm_._seen_BOL_assertion)
+ {
+ os_ << " end_bol_ = bol_;\n";
}
os_ << " end_token_ = curr_;\n";
@@ -498,10 +490,16 @@
os_ << " {\n";
os_ << " end_state_ = true;\n";
os_ << " id_ = *(ptr_ + id_index);\n";
+ os_ << " uid_ = *(ptr_ + unique_id_index);\n";
if (dfas_ > 1)
{
- os_ << " start_state_ = *(ptr_ + state_index);\n";
+ os_ << " end_start_state_ = *(ptr_ + state_index);\n";
+ }
+
+ if (sm_._seen_BOL_assertion)
+ {
+ os_ << " end_bol_ = bol_;\n";
}
os_ << " end_token_ = curr_;\n";
@@ -513,12 +511,40 @@
os_ << " if (end_state_)\n";
os_ << " {\n";
os_ << " // return longest match\n";
+
+ if (dfas_ > 1)
+ {
+ os_ << " start_state_ = end_start_state_;\n";
+ }
+
+ if (sm_._seen_BOL_assertion && dfas_ < 2)
+ {
+ os_ << " beg_of_line_ = end_bol_;\n";
+ }
+
os_ << " start_token_ = end_token_;\n";
if (dfas_ > 1)
{
os_ << '\n';
- os_ << " if (id_ == 0) goto again;\n";
+ os_ << " if (id_ == 0)\n";
+ os_ << " {\n";
+
+ if (sm_._seen_BOL_assertion)
+ {
+ os_ << " bol_ = end_bol_;\n";
+ }
+
+ os_ << " goto again;\n";
+ os_ << " }\n";
+
+ if (sm_._seen_BOL_assertion)
+ {
+ os_ << " else\n";
+ os_ << " {\n";
+ os_ << " beg_of_line_ = end_bol_;\n";
+ os_ << " }\n";
+ }
}
os_ << " }\n";
@@ -528,12 +554,20 @@
if (skip_unknown_)
{
os_ << " // No match causes char to be skipped\n";
+
+ if (sm_._seen_BOL_assertion)
+ {
+ os_ << " beg_of_line_ = *start_token_ == '\\n';\n";
+ }
+
os_ << " ++start_token_;\n";
}
os_ << " id_ = npos;\n";
+ os_ << " uid_ = npos;\n";
os_ << " }\n";
os_ << '\n';
+ os_ << " unique_id_ = uid_;\n";
os_ << " return id_;\n";
os_ << "}\n";
os_ << "\n#endif\n";
Modified: trunk/boost/spirit/home/support/detail/lexer/generator.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/generator.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/generator.hpp 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -285,7 +285,8 @@
locale_, node_ptr_vector_, macromap_, token_map_,
seen_BOL_assertion_, seen_EOL_assertion_);
macro_iter_pair map_iter_ = macromap_.
- insert (macro_pair (name_, (detail::node const*)0));
+ insert (macro_pair (name_, static_cast<const detail::node *>
+ (0)));
map_iter_.first->second = node_;
}
@@ -511,7 +512,8 @@
}
else
{
- iter_ = lhs_->insert (++iter_, (charset*)0);
+ iter_ = lhs_->insert (++iter_,
+ static_cast<charset *>(0));
*iter_ = overlap_.release ();
// VC++ 6 Hack:
@@ -653,7 +655,8 @@
}
else
{
- iter_ = lhs_->insert (++iter_, (equivset*)0);
+ iter_ = lhs_->insert (++iter_,
+ static_cast<equivset *>(0));
*iter_ = overlap_.release ();
// VC++ 6 Hack:
Modified: trunk/boost/spirit/home/support/detail/lexer/input.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/input.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/input.hpp 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -172,6 +172,7 @@
bool end_state_ = *ptr_ != 0;
std::size_t id_ = *(ptr_ + id_index);
std::size_t uid_ = *(ptr_ + unique_id_index);
+ std::size_t end_start_state_ = start_state_;
bool end_bol_ = bol_;
FwdIter end_token_ = start_token_;
@@ -211,7 +212,7 @@
end_state_ = true;
id_ = *(ptr_ + id_index);
uid_ = *(ptr_ + unique_id_index);
- start_state_ = *(ptr_ + state_index);
+ end_start_state_ = *(ptr_ + state_index);
end_bol_ = bol_;
end_token_ = curr_;
}
@@ -228,7 +229,7 @@
end_state_ = true;
id_ = *(ptr_ + id_index);
uid_ = *(ptr_ + unique_id_index);
- start_state_ = *(ptr_ + state_index);
+ end_start_state_ = *(ptr_ + state_index);
end_bol_ = bol_;
end_token_ = curr_;
}
@@ -237,14 +238,18 @@
if (end_state_)
{
// return longest match
- _data.bol = end_bol_;
+ start_state_ = end_start_state_;
start_token_ = end_token_;
if (id_ == 0)
{
- bol_ = _data.bol;
+ bol_ = end_bol_;
goto again;
}
+ else
+ {
+ _data.bol = end_bol_;
+ }
}
else
{
@@ -279,6 +284,7 @@
bool end_state_ = *ptr_ != 0;
std::size_t id_ = *(ptr_ + id_index);
std::size_t uid_ = *(ptr_ + unique_id_index);
+ std::size_t end_start_state_ = start_state_;
FwdIter end_token_ = start_token_;
while (curr_ != end_)
@@ -298,7 +304,7 @@
end_state_ = true;
id_ = *(ptr_ + id_index);
uid_ = *(ptr_ + unique_id_index);
- start_state_ = *(ptr_ + state_index);
+ end_start_state_ = *(ptr_ + state_index);
end_token_ = curr_;
}
}
@@ -306,6 +312,7 @@
if (end_state_)
{
// return longest match
+ start_state_ = end_start_state_;
start_token_ = end_token_;
if (id_ == 0) goto again;
@@ -401,8 +408,8 @@
if (end_state_)
{
// return longest match
- start_token_ = end_token_;
_data.bol = end_bol_;
+ start_token_ = end_token_;
}
else
{
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk