|
Boost-Commit : |
Subject: [Boost-commit] svn:boost r53178 - in trunk: boost/spirit/home/lex boost/spirit/home/lex/detail boost/spirit/home/lex/lexer boost/spirit/home/lex/lexer/lexertl boost/spirit/home/support/detail/lexer boost/spirit/home/support/detail/lexer/conversion boost/spirit/home/support/detail/lexer/parser boost/spirit/home/support/detail/lexer/parser/tree libs/spirit/example/lex/static_lexer
From: hartmut.kaiser_at_[hidden]
Date: 2009-05-22 12:34:32
Author: hkaiser
Date: 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
New Revision: 53178
URL: http://svn.boost.org/trac/boost/changeset/53178
Log:
Spirit: Updated Spirit.Lex to support unique ids provided by the underlying lexertl implementation. Speeds up things quite a bit...
Text files modified:
trunk/boost/spirit/home/lex/detail/sequence_function.hpp | 22 +++++
trunk/boost/spirit/home/lex/lexer/action.hpp | 7 ++
trunk/boost/spirit/home/lex/lexer/char_token_def.hpp | 9 +
trunk/boost/spirit/home/lex/lexer/lexer.hpp | 12 +++
trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp | 128 +++++++++++++++------------------------
trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp | 111 +++++++++++++++++-----------------
trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp | 88 ++++-----------------------
trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp | 89 +++++++++++++++------------
trunk/boost/spirit/home/lex/lexer/lexertl/static_functor.hpp | 76 ++++++++++++-----------
trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp | 9 ++
trunk/boost/spirit/home/lex/lexer/sequence.hpp | 11 ++
trunk/boost/spirit/home/lex/lexer/string_token_def.hpp | 3
trunk/boost/spirit/home/lex/lexer/token_def.hpp | 7 +
trunk/boost/spirit/home/lex/lexer/token_set.hpp | 36 +++++++---
trunk/boost/spirit/home/lex/reference.hpp | 8 ++
trunk/boost/spirit/home/support/detail/lexer/consts.hpp | 8 +-
trunk/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp | 2
trunk/boost/spirit/home/support/detail/lexer/debug.hpp | 12 +++
trunk/boost/spirit/home/support/detail/lexer/file_input.hpp | 31 +++++++--
trunk/boost/spirit/home/support/detail/lexer/generator.hpp | 27 +++++---
trunk/boost/spirit/home/support/detail/lexer/input.hpp | 70 +++++++++++++++++----
trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp | 10 +-
trunk/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp | 10 ++
trunk/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp | 5 +
trunk/boost/spirit/home/support/detail/lexer/rules.hpp | 127 +++++++++++++++++++++++++++++++++------
trunk/boost/spirit/home/support/detail/lexer/state_machine.hpp | 6 +
trunk/libs/spirit/example/lex/static_lexer/word_count_static.hpp | 101 +++++++++++++++++--------------
27 files changed, 607 insertions(+), 418 deletions(-)
Modified: trunk/boost/spirit/home/lex/detail/sequence_function.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/detail/sequence_function.hpp (original)
+++ trunk/boost/spirit/home/lex/detail/sequence_function.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -16,22 +16,38 @@
namespace boost { namespace spirit { namespace lex { namespace detail
{
template <typename LexerDef, typename String>
- struct sequence_function
+ struct sequence_collect_function
{
- sequence_function(LexerDef& def_, String const& state_)
+ sequence_collect_function(LexerDef& def_, String const& state_)
: def(def_), state(state_) {}
template <typename Component>
bool operator()(Component const& component) const
{
component.collect(def, state);
- return false; // execute for all sequence elements
+ return false; // execute for all sequence elements
}
LexerDef& def;
String const& state;
};
+ template <typename LexerDef>
+ struct sequence_add_actions_function
+ {
+ sequence_add_actions_function(LexerDef& def_)
+ : def(def_) {}
+
+ template <typename Component>
+ bool operator()(Component const& component) const
+ {
+ component.add_actions(def);
+ return false; // execute for all sequence elements
+ }
+
+ LexerDef& def;
+ };
+
}}}}
#endif
Modified: trunk/boost/spirit/home/lex/lexer/action.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/action.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/action.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -32,6 +32,13 @@
// first collect the token definition information for the token_def
// this action is attached to
subject.collect(lexdef, state);
+ }
+
+ template <typename LexerDef>
+ void add_actions(LexerDef& lexdef) const
+ {
+ // call to add all actions attached further down the hierarchy
+ subject.add_actions(lexdef);
// retrieve the id of the associated token_def and register the
// given semantic action with the lexer instance
Modified: trunk/boost/spirit/home/lex/lexer/char_token_def.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/char_token_def.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/char_token_def.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -61,18 +61,21 @@
{
typedef typename CharEncoding::char_type char_type;
- char_token_def(char_type ch) : ch (ch) {}
+ char_token_def(char_type ch) : ch(ch), unique_id_(~0) {}
template <typename LexerDef, typename String>
void collect(LexerDef& lexdef, String const& state) const
{
- lexdef.add_token (state.c_str(), ch,
- static_cast<std::size_t>(ch));
+ lexdef.add_token (state.c_str(), ch, static_cast<std::size_t>(ch));
}
+ template <typename LexerDef>
+ void add_actions(LexerDef& lexdef) const {}
+
std::size_t id() const { return static_cast<std::size_t>(ch); }
char_type ch;
+ mutable std::size_t unique_id_;
};
///////////////////////////////////////////////////////////////////////////
Modified: trunk/boost/spirit/home/lex/lexer/lexer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexer.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexer.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -197,12 +197,22 @@
};
friend struct pattern_adder;
+ private:
+ // Helper function to invoke the necessary 2 step compilation
+ // process on token definition expressions
+ template <typename TokenExpr>
+ void compile2pass(TokenExpr const& expr)
+ {
+ expr.collect(def, state);
+ expr.add_actions(def);
+ }
+
public:
///////////////////////////////////////////////////////////////////
template <typename Expr>
void define(Expr const& expr)
{
- compile<lex::domain>(expr).collect(def, state);
+ compile2pass(compile<lex::domain>(expr));
}
lexer_def_(LexerDef& def_, string_type const& state_)
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -57,14 +57,15 @@
, rules(data_.rules_)
, first(first_), last(last_) {}
- std::size_t next(Iterator& end)
+ std::size_t next(Iterator& end, std::size_t& unique_id)
{
typedef basic_iterator_tokeniser<Iterator> tokenizer;
- return tokenizer::next(state_machine, first, end, last);
+ return tokenizer::next(state_machine, first, end, last, unique_id);
}
// nothing to invoke, so this is empty
- bool invoke_actions(std::size_t, std::size_t, Iterator const&)
+ bool invoke_actions(std::size_t, std::size_t, std::size_t
+ , Iterator const&)
{
return true; // always accept
}
@@ -96,11 +97,11 @@
data (IterData const& data_, Iterator& first_, Iterator const& last_)
: base_type(data_, first_, last_), state(0) {}
- std::size_t next(Iterator& end)
+ std::size_t next(Iterator& end, std::size_t& unique_id)
{
typedef basic_iterator_tokeniser<Iterator> tokenizer;
return tokenizer::next(this->state_machine, state,
- this->first, end, this->last);
+ this->first, end, this->last, unique_id);
}
std::size_t& get_state() { return state; }
@@ -135,60 +136,36 @@
typedef void functor_type(iterpair_type, std::size_t, bool&, data&);
typedef boost::function<functor_type> functor_wrapper_type;
- struct action_key
- {
- action_key(std::size_t id, std::size_t state)
- : id_(id), state_(state) {}
- action_key(std::pair<std::size_t, std::size_t> const& k)
- : id_(k.first), state_(k.second) {}
-
- friend bool operator<(action_key const& lhs, action_key const& rhs)
- {
- return lhs.id_ < rhs.id_ ||
- (lhs.id_ == rhs.id_ && lhs.state_ < rhs.state_);
- }
-
- std::size_t id_;
- std::size_t state_;
- };
- typedef std::multimap<action_key, functor_wrapper_type>
+ typedef std::vector<std::vector<functor_wrapper_type> >
semantic_actions_type;
- typedef detail::wrap_action<functor_wrapper_type, iterpair_type, data>
- wrap_action_type;
+ typedef detail::wrap_action<functor_wrapper_type
+ , iterpair_type, data> wrap_action_type;
template <typename IterData>
data (IterData const& data_, Iterator& first_, Iterator const& last_)
: base_type(data_, first_, last_)
- , actions(data_.actions_) {}
+ , actions_(data_.actions_) {}
// invoke attached semantic actions, if defined
- bool invoke_actions(std::size_t id, std::size_t state
- , Iterator const& end)
+ bool invoke_actions(std::size_t state, std::size_t id
+ , std::size_t unique_id, Iterator const& end)
{
- if (actions.empty())
- return true; // nothing to invoke, continue with 'match'
+ if (state >= actions_.size())
+ return true; // no action defined for this state
- iterpair_type itp(this->first, end);
- bool match = true;
+ std::vector<functor_wrapper_type> const& actions = actions_[state];
- typedef typename semantic_actions_type::const_iterator
- iterator_type;
+ if (unique_id >= actions.size() || !actions[unique_id])
+ return true; // nothing to invoke, continue with 'match'
- std::pair<iterator_type, iterator_type> p =
- actions.equal_range(action_key(id, state));
-
- while (p.first != p.second)
- {
- ((*p.first).second)(itp, id, match, *this);
- if (!match)
- return false; // return a 'no-match'
- ++p.first;
- }
- return true; // normal execution
+ iterpair_type itp(this->first, end);
+ bool match = true;
+ actions[unique_id](itp, id, match, *this);
+ return match;
}
- semantic_actions_type const& actions;
+ semantic_actions_type const& actions_;
};
}
@@ -297,7 +274,8 @@
#endif
Iterator end = data.first;
- std::size_t id = data.next(end);
+ std::size_t unique_id = boost::lexer::npos;
+ std::size_t id = data.next(end, unique_id);
if (boost::lexer::npos == id) { // no match
#if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
@@ -309,47 +287,43 @@
std::cerr << "Not matched, in state: " << data.state
<< ", lookahead: >" << next << "<" << std::endl;
#endif
- result = result_type(0);
+ return result = result_type(0);
}
else if (0 == id) { // EOF reached
#if defined(BOOST_SPIRIT_STATIC_EOF)
- result = eof;
+ return result = eof;
#else
- result = mp.ftor.eof;
+ return result = mp.ftor.eof;
#endif
}
- else {
+
#if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
- {
- std::string next;
- Iterator it = end;
- for (std::size_t i = 0; i < 10 && it != data.last; ++it, ++i)
- next += *it;
-
- std::cerr << "Matched: " << id << ", in state: "
- << data.state << ", string: >"
- << std::basic_string<char_type>(data.first, end) << "<"
- << ", lookahead: >" << next << "<" << std::endl;
- }
+ {
+ std::string next;
+ Iterator it = end;
+ for (std::size_t i = 0; i < 10 && it != data.last; ++it, ++i)
+ next += *it;
+
+ std::cerr << "Matched: " << id << ", in state: "
+ << data.state << ", string: >"
+ << std::basic_string<char_type>(data.first, end) << "<"
+ << ", lookahead: >" << next << "<" << std::endl;
+ }
#endif
- // invoke_actions might change state
- std::size_t state = data.get_state();
+ // invoke_actions might change state
+ std::size_t state = data.get_state();
- // invoke attached semantic actions, if defined
- if (!data.invoke_actions(id, state, end))
- {
- // one of the semantic actions signaled no-match
- result = result_type(0);
- }
- else
- {
- // return matched token, advancing 'data.first' past the
- // matched sequence
- assign_on_exit<Iterator> on_exit(data.first, end);
- result = result_type(id, state, data.first, end);
- }
+ // invoke attached semantic actions, if defined
+ if (!data.invoke_actions(state, id, unique_id, end))
+ {
+ // one of the semantic actions signaled no-match
+ return result = result_type(0);
}
- return result;
+
+ // return matched token, advancing 'data.first' past the
+ // matched sequence
+ assign_on_exit<Iterator> on_exit(data.first, end);
+ return result = result_type(id, state, data.first, end);
}
// set_state are propagated up to the iterator interface, allowing to
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -68,6 +68,10 @@
}
os_ << "};\n\n";
+ os_ << "// this variable defines the number of lexer states\n";
+ os_ << "std::size_t const lexer_state_count = "
+ << rules_.statemap().size() << ";\n\n";
+
return true;
}
@@ -137,11 +141,12 @@
os_ << "\n ";
}
- os_ << "Iterator &start_token_, Iterator const& end_)\n";
+ os_ << "Iterator &start_token_, Iterator const& end_, ";
+ os_ << "std::size_t& unique_id_)\n";
os_ << "{\n";
- os_ << " enum {end_state_index, id_index, state_index, bol_index, "
- "eol_index,\n";
- os_ << " dead_state_index, dfa_offset};\n";
+ os_ << " enum {end_state_index, id_index, unique_id_index, "
+ "state_index, bol_index,\n";
+ os_ << " eol_index, dead_state_index, dfa_offset};\n";
os_ << " static const std::size_t npos = static_cast"
"<std::size_t>(~0);\n";
@@ -156,7 +161,7 @@
std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front();
os_ << " static const std::size_t lookup" << state_
- << "_[" << lookups_ << "] = {";
+ << "_[" << lookups_ << "] = {\n ";
for (/**/; i_ < count_; ++i_)
{
@@ -171,16 +176,16 @@
if (i_ < count_ - 1)
{
- os_ << "," << std::endl << " ";
+ os_ << ",\n ";
}
j_ = 1;
}
- os_ << "};\n";
+ os_ << " };\n";
count_ = sm_.data()._dfa[state_]->size ();
os_ << " static const std::size_t dfa" << state_ << "_[" <<
- count_ << "] = {";
+ count_ << "] = {\n ";
count_ /= 8;
for (i_ = 0; i_ < count_; ++i_)
@@ -196,7 +201,7 @@
if (i_ < count_ - 1)
{
- os_ << "," << std::endl << " ";
+ os_ << ",\n ";
}
}
@@ -218,25 +223,23 @@
}
}
- os_ << "};\n";
+ os_ << " };\n";
}
std::size_t count_ = sm_.data()._dfa_alphabet.size();
std::size_t i_ = 1;
os_ << " static const std::size_t *lookup_arr_[" << count_ <<
- "] = {";
-
- os_ << "lookup0_";
+ "] = { lookup0_";
for (i_ = 1; i_ < count_; ++i_)
{
os_ << ", " << "lookup" << i_ << "_";
}
- os_ << "};\n";
+ os_ << " };\n";
os_ << " static const std::size_t dfa_alphabet_arr_[" <<
- count_ << "] = {";
+ count_ << "] = { ";
os_ << sm_.data()._dfa_alphabet.front ();
@@ -245,9 +248,9 @@
os_ << ", " << sm_.data()._dfa_alphabet[i_];
}
- os_ << "};\n";
+ os_ << " };\n";
os_ << " static const std::size_t *dfa_arr_[" << count_ <<
- "] = {";
+ "] = { ";
os_ << "dfa0_";
for (i_ = 1; i_ < count_; ++i_)
@@ -255,7 +258,7 @@
os_ << ", " << "dfa" << i_ << "_";
}
- os_ << "};\n";
+ os_ << " };\n";
}
else
{
@@ -266,7 +269,7 @@
std::size_t count_ = lookups_ / 8;
os_ << " static const std::size_t lookup_[";
- os_ << sm_.data()._lookup[0]->size() << "] = {";
+ os_ << sm_.data()._lookup[0]->size() << "] = {\n ";
for (; i_ < count_; ++i_)
{
@@ -281,17 +284,17 @@
if (i_ < count_ - 1)
{
- os_ << "," << std::endl << " ";
+ os_ << ",\n ";
}
j_ = 1;
}
- os_ << "};\n";
+ os_ << " };\n";
os_ << " static const std::size_t dfa_alphabet_ = " <<
sm_.data()._dfa_alphabet.front () << ";\n";
os_ << " static const std::size_t dfa_[" <<
- sm_.data()._dfa[0]->size () << "] = {";
+ sm_.data()._dfa[0]->size () << "] = {\n ";
count_ = sm_.data()._dfa[0]->size () / 8;
for (i_ = 0; i_ < count_; ++i_)
@@ -307,7 +310,7 @@
if (i_ < count_ - 1)
{
- os_ << "," << std::endl << " ";
+ os_ << ",\n ";
}
}
@@ -330,7 +333,7 @@
}
}
- os_ << "};\n";
+ os_ << " };\n";
}
os_ << "\n if (start_token_ == end_) return 0;\n\n";
@@ -347,24 +350,20 @@
os_ << " Iterator curr_ = start_token_;\n";
os_ << " bool end_state_ = *ptr_ != 0;\n";
os_ << " std::size_t id_ = *(ptr_ + id_index);\n";
- os_ << " Iterator end_token_ = start_token_;\n";
- os_ << '\n';
+ os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n";
+ os_ << " Iterator end_token_ = start_token_;\n\n";
+
os_ << " while (curr_ != end_)\n";
os_ << " {\n";
if (sm_.data()._seen_BOL_assertion)
{
- os_ << " std::size_t const BOL_state_ = ptr_[bol_index];\n";
+ os_ << " std::size_t const BOL_state_ = ptr_[bol_index];\n\n";
}
if (sm_.data()._seen_EOL_assertion)
{
- os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n";
- }
-
- if (sm_.data()._seen_BOL_assertion || sm_.data()._seen_EOL_assertion)
- {
- os_ << '\n';
+ os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
}
if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion)
@@ -396,7 +395,7 @@
os_ << " if (state_ == 0) break;\n";
os_ << '\n';
os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
- os_ << " }\n";
+ os_ << " }\n\n";
}
else if (sm_.data()._seen_BOL_assertion)
{
@@ -411,8 +410,8 @@
if (lookups_ == 256)
{
- os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
- os_ << " (*curr_++)]];\n";
+ os_ << " ptr_[lookup_[static_cast<unsigned char>"
+ "(*curr_++)]];\n";
}
else
{
@@ -423,7 +422,7 @@
os_ << " if (state_ == 0) break;\n";
os_ << '\n';
os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
- os_ << " }\n";
+ os_ << " }\n\n";
}
else if (sm_.data()._seen_EOL_assertion)
{
@@ -437,8 +436,8 @@
if (lookups_ == 256)
{
- os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
- os_ << " (*curr_++)]];\n";
+ os_ << " ptr_[lookup_[static_cast<unsigned char>"
+ "(*curr_++)]];\n";
}
else
{
@@ -449,7 +448,7 @@
os_ << " if (state_ == 0) break;\n";
os_ << '\n';
os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
- os_ << " }\n";
+ os_ << " }\n\n";
}
else
{
@@ -457,8 +456,8 @@
if (lookups_ == 256)
{
- os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
- os_ << " (*curr_++)]];\n";
+ os_ << " ptr_[lookup_[static_cast<unsigned char>"
+ "(*curr_++)]];\n";
}
else
{
@@ -468,14 +467,14 @@
os_ << '\n';
os_ << " if (state_ == 0) break;\n";
os_ << '\n';
- os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+ os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n";
}
- os_ << '\n';
os_ << " if (*ptr_)\n";
os_ << " {\n";
os_ << " end_state_ = true;\n";
os_ << " id_ = *(ptr_ + id_index);\n";
+ os_ << " uid_ = *(ptr_ + unique_id_index);\n";
if (dfas_ > 1)
{
@@ -484,21 +483,21 @@
os_ << " end_token_ = curr_;\n";
os_ << " }\n";
- os_ << " }\n";
- os_ << '\n';
+ os_ << " }\n\n";
if (sm_.data()._seen_EOL_assertion)
{
- os_ << " const std::size_t EOL_state_ = ptr_[eol_index];\n";
- os_ << '\n';
+ os_ << " const std::size_t EOL_state_ = ptr_[eol_index];\n\n";
+
os_ << " if (EOL_state_ && curr_ == end_)\n";
os_ << " {\n";
- os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
- os_ << '\n';
+ os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n";
+
os_ << " if (*ptr_)\n";
os_ << " {\n";
os_ << " end_state_ = true;\n";
os_ << " id_ = *(ptr_ + id_index);\n";
+ os_ << " uid_ = *(ptr_ + unique_id_index);\n";
if (dfas_ > 1)
{
@@ -507,8 +506,7 @@
os_ << " end_token_ = curr_;\n";
os_ << " }\n";
- os_ << " }\n";
- os_ << '\n';
+ os_ << " }\n\n";
}
os_ << " if (end_state_)\n";
@@ -518,7 +516,6 @@
if (dfas_ > 1)
{
- os_ << '\n';
os_ << " if (id_ == 0) goto again;\n";
}
@@ -533,8 +530,10 @@
}
os_ << " id_ = npos;\n";
- os_ << " }\n";
- os_ << '\n';
+ os_ << " uid_ = npos;\n";
+ os_ << " }\n\n";
+
+ os_ << " unique_id_ = uid_;\n";
os_ << " return id_;\n";
os_ << "}\n\n";
@@ -555,7 +554,7 @@
{
if (!lexer.init_dfa())
return false;
- return detail::generate_cpp(lexer.state_machine, lexer.rules, os
+ return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os
, name_suffix, false, false);
}
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -24,82 +24,11 @@
typedef typename boost::detail::iterator_traits<Iterator>::value_type
char_type;
-// static std::size_t next (const std::size_t * const lookup_,
-// std::size_t const dfa_alphabet_, const std::size_t * const dfa_,
-// Iterator const& start_, Iterator &start_token_,
-// Iterator const& end_)
-// {
-// if (start_token_ == end_) return 0;
-//
-// const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
-// Iterator curr_ = start_token_;
-// bool end_state_ = *ptr_ != 0;
-// std::size_t id_ = *(ptr_ + lexer::id_index);
-// Iterator end_token_ = start_token_;
-//
-// while (curr_ != end_)
-// {
-// std::size_t const BOL_state_ = ptr_[lexer::bol_index];
-// std::size_t const EOL_state_ = ptr_[lexer::eol_index];
-//
-// if (BOL_state_ && (start_token_ == start_ ||
-// *(start_token_ - 1) == '\n'))
-// {
-// ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
-// }
-// else if (EOL_state_ && *curr_ == '\n')
-// {
-// ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
-// }
-// else
-// {
-// std::size_t const state_ = ptr_[lookup_[*curr_++]];
-//
-// if (state_ == 0)
-// {
-// break;
-// }
-//
-// ptr_ = &dfa_[state_ * dfa_alphabet_];
-// }
-//
-// if (*ptr_)
-// {
-// end_state_ = true;
-// id_ = *(ptr_ + lexer::id_index);
-// end_token_ = curr_;
-// }
-// }
-//
-// const std::size_t EOL_state_ = ptr_[lexer::eol_index];
-//
-// if (EOL_state_ && curr_ == end_)
-// {
-// ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
-//
-// if (*ptr_)
-// {
-// end_state_ = true;
-// id_ = *(ptr_ + lexer::id_index);
-// end_token_ = curr_;
-// }
-// }
-//
-// if (end_state_) {
-// // return longest match
-// start_token_ = end_token_;
-// }
-// else {
-// id_ = lexer::npos;
-// }
-//
-// return id_;
-// }
-
static std::size_t next (
boost::lexer::basic_state_machine<char_type> const& state_machine_
, std::size_t &dfa_state_, Iterator const& start_
- , Iterator &start_token_, Iterator const& end_)
+ , Iterator &start_token_, Iterator const& end_
+ , std::size_t& unique_id_)
{
if (start_token_ == end_) return 0;
@@ -112,6 +41,7 @@
Iterator curr_ = start_token_;
bool end_state_ = *ptr_ != 0;
std::size_t id_ = *(ptr_ + boost::lexer::id_index);
+ std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
Iterator end_token_ = start_token_;
while (curr_ != end_)
@@ -154,6 +84,7 @@
{
end_state_ = true;
id_ = *(ptr_ + boost::lexer::id_index);
+ uid_ = *(ptr_ + boost::lexer::unique_id_index);
dfa_state_ = *(ptr_ + boost::lexer::state_index);
end_token_ = curr_;
}
@@ -169,6 +100,7 @@
{
end_state_ = true;
id_ = *(ptr_ + boost::lexer::id_index);
+ uid_ = *(ptr_ + boost::lexer::unique_id_index);
dfa_state_ = *(ptr_ + boost::lexer::state_index);
end_token_ = curr_;
}
@@ -183,15 +115,18 @@
}
else {
id_ = boost::lexer::npos;
+ uid_ = boost::lexer::npos;
}
+ unique_id_ = uid_;
return id_;
}
///////////////////////////////////////////////////////////////////////
static std::size_t next (
boost::lexer::basic_state_machine<char_type> const& state_machine_
- , Iterator const& start_, Iterator &start_token_, Iterator const& end_)
+ , Iterator const& start_, Iterator &start_token_, Iterator const& end_
+ , std::size_t& unique_id_)
{
if (start_token_ == end_) return 0;
@@ -202,6 +137,7 @@
Iterator curr_ = start_token_;
bool end_state_ = *ptr_ != 0;
std::size_t id_ = *(ptr_ + boost::lexer::id_index);
+ std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
Iterator end_token_ = start_token_;
while (curr_ != end_)
@@ -244,6 +180,7 @@
{
end_state_ = true;
id_ = *(ptr_ + boost::lexer::id_index);
+ uid_ = *(ptr_ + boost::lexer::unique_id_index);
end_token_ = curr_;
}
}
@@ -258,6 +195,7 @@
{
end_state_ = true;
id_ = *(ptr_ + boost::lexer::id_index);
+ uid_ = *(ptr_ + boost::lexer::unique_id_index);
end_token_ = curr_;
}
}
@@ -268,8 +206,10 @@
}
else {
id_ = boost::lexer::npos;
+ uid_ = boost::lexer::npos;
}
+ unique_id_ = uid_;
return id_;
}
};
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -117,8 +117,7 @@
std::size_t add_state(char_type const* state)
{
- rules.add_state(state);
- return rules.state(state);
+ return rules.add_state(state);
}
string_type initial_state() const
{
@@ -194,7 +193,7 @@
// operator_bool() is needed for the safe_bool base class
operator typename safe_bool<lexer>::result_type() const
{
- return safe_bool<lexer>()(initialized_dfa);
+ return safe_bool<lexer>()(initialized_dfa_);
}
typedef typename boost::detail::iterator_traits<Iterator>::value_type
@@ -225,7 +224,7 @@
if (!init_dfa())
return iterator_type();
- iterator_data_type iterator_data = { state_machine, rules, actions };
+ iterator_data_type iterator_data = { state_machine_, rules_, actions_ };
return iterator_type(iterator_data, first, last);
}
@@ -249,10 +248,10 @@
}
// Lexer instances can be created by means of a derived class only.
- lexer(unsigned int flags_)
- : initialized_dfa(false), flags(map_flags(flags_))
+ lexer(unsigned int flags)
+ : flags_(map_flags(flags)), initialized_dfa_(false)
{
- rules.flags(flags);
+ rules_.flags(flags_);
}
public:
@@ -261,15 +260,15 @@
std::size_t token_id)
{
add_state(state);
- rules.add(state, detail::escape(tokendef), token_id, state);
- initialized_dfa = false;
+ rules_.add(state, detail::escape(tokendef), token_id, state);
+ initialized_dfa_ = false;
}
void add_token(char_type const* state, string_type const& tokendef,
std::size_t token_id)
{
add_state(state);
- rules.add(state, tokendef, token_id, state);
- initialized_dfa = false;
+ rules_.add(state, tokendef, token_id, state);
+ initialized_dfa_ = false;
}
// Allow a token_set to be associated with this lexer instance. This
@@ -278,8 +277,8 @@
void add_token(char_type const* state, token_set const& tokset)
{
add_state(state);
- rules.add(state, tokset.get_rules());
- initialized_dfa = false;
+ rules_.add(state, tokset.get_rules());
+ initialized_dfa_ = false;
}
// Allow to associate a whole lexer instance with another lexer
@@ -287,12 +286,12 @@
// lexer into this instance.
template <typename Token_, typename Iterator_, typename Functor_
, typename TokenSet_>
- void add_token(char_type const* state
+ std::size_t add_token(char_type const* state
, lexer<Token_, Iterator_, Functor_, TokenSet_> const& lexer_def)
{
add_state(state);
- rules.add(state, lexer_def.get_rules());
- initialized_dfa = false;
+ rules_.add(state, lexer_def.get_rules());
+ initialized_dfa_ = false;
}
// interface for pattern definition management
@@ -300,32 +299,31 @@
string_type const& patterndef)
{
add_state(state);
- rules.add_macro(name.c_str(), patterndef);
- initialized_dfa = false;
+ rules_.add_macro(name.c_str(), patterndef);
+ initialized_dfa_ = false;
}
- boost::lexer::rules const& get_rules() const { return rules; }
+ boost::lexer::rules const& get_rules() const { return rules_; }
void clear(char_type const* state)
{
- std::size_t s = rules.state(state);
+ std::size_t s = rules_.state(state);
if (boost::lexer::npos != s)
- rules.clear(state);
- initialized_dfa = false;
+ rules_.clear(state);
+ initialized_dfa_ = false;
}
std::size_t add_state(char_type const* state)
{
- std::size_t stateid = rules.state(state);
+ std::size_t stateid = rules_.state(state);
if (boost::lexer::npos == stateid) {
- rules.add_state(state);
- stateid = rules.state(state);
- initialized_dfa = false;
+ stateid = rules_.add_state(state);
+ initialized_dfa_ = false;
}
return stateid;
}
string_type initial_state() const
{
- return string_type(rules.initial());
+ return string_type(rules_.initial());
}
// Register a semantic action with the given id
@@ -341,32 +339,43 @@
value_type;
typedef typename Functor::wrap_action_type wrapper_type;
- actions.insert(value_type(std::make_pair(id, state)
- , wrapper_type::call(act)));
+ if (actions_.size() <= state)
+ actions_.resize(state + 1);
+
+ std::size_t unique_id = rules_.retrieve_id(state, id);
+ BOOST_ASSERT(boost::lexer::npos != unique_id);
+
+ value_type& actions (actions_[state]);
+ if (actions.size() <= unique_id)
+ actions.resize(unique_id + 1);
+
+ actions[unique_id] = wrapper_type::call(act);
}
bool init_dfa() const
{
- if (!initialized_dfa) {
- state_machine.clear();
+ if (!initialized_dfa_) {
+ state_machine_.clear();
typedef boost::lexer::basic_generator<char_type> generator;
- generator::build (rules, state_machine);
- generator::minimise (state_machine);
+ generator::build (rules_, state_machine_);
+ generator::minimise (state_machine_);
#if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
- boost::lexer::debug::dump(state_machine, std::cerr);
+ boost::lexer::debug::dump(state_machine_, std::cerr);
#endif
- initialized_dfa = true;
+ initialized_dfa_ = true;
}
return true;
}
private:
- mutable boost::lexer::basic_state_machine<char_type> state_machine;
- boost::lexer::basic_rules<char_type> rules;
- typename Functor::semantic_actions_type actions;
- mutable bool initialized_dfa;
- boost::lexer::regex_flags flags;
+ // lexertl specific data
+ mutable boost::lexer::basic_state_machine<char_type> state_machine_;
+ boost::lexer::basic_rules<char_type> rules_;
+ boost::lexer::regex_flags flags_;
+
+ typename Functor::semantic_actions_type actions_;
+ mutable bool initialized_dfa_;
template <typename Lexer>
friend bool generate_static(Lexer const&, std::ostream&, char const*);
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/static_functor.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/static_functor.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/static_functor.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -49,7 +49,7 @@
char_type;
typedef std::size_t (*next_token_functor)(std::size_t&,
- Iterator const&, Iterator&, Iterator const&);
+ Iterator const&, Iterator&, Iterator const&, std::size_t&);
typedef unused_type semantic_actions_type;
@@ -62,15 +62,15 @@
: next_token(data_.next_), first(first_), last(last_)
{}
- std::size_t next(Iterator& end)
+ std::size_t next(Iterator& end, std::size_t& unique_id)
{
- typedef basic_iterator_tokeniser<Iterator> tokenizer;
std::size_t state;
- return next_token(state, first, end, last);
+ return next_token(state, first, end, last, unique_id);
}
// nothing to invoke, so this is empty
- bool invoke_actions(std::size_t, Iterator const&)
+ bool invoke_actions(std::size_t, std::size_t, std::size_t
+ , Iterator const&)
{
return true; // always accept
}
@@ -93,9 +93,8 @@
typedef typename base_type::state_type state_type;
typedef typename base_type::char_type char_type;
- typedef
- typename base_type::semantic_actions_type
- semantic_actions_type;
+ typedef typename base_type::semantic_actions_type
+ semantic_actions_type;
// initialize the shared data
template <typename IterData>
@@ -103,17 +102,24 @@
: base_type(data_, first_, last_), state(0)
{}
- std::size_t next(Iterator& end)
+ std::size_t next(Iterator& end, std::size_t& unique_id)
{
- typedef basic_iterator_tokeniser<Iterator> tokenizer;
- return this->next_token(state, this->first, end, this->last);
+ return this->next_token(state, this->first, end, this->last
+ , unique_id);
}
std::size_t& get_state() { return state; }
void set_state_name (char_type const* new_state)
{
- std::size_t state_id = this->rules.state(new_state);
+ this->rules.state(new_state);
+ for (std::size_t state_id = 0;
+ state_id < sizeof(lexer_state_names)/sizeof(lexer_state_names[0]); ++state_id)
+
+ // if the following assertion fires you've probably been using
+ // a lexer state name which was not defined in your token
+ // definition
BOOST_ASSERT(state_id != boost::lexer::npos);
+
if (state_id != boost::lexer::npos)
state = state_id;
}
@@ -135,7 +141,7 @@
typedef void functor_type(iterpair_type, std::size_t, bool&, static_data&);
typedef boost::function<functor_type> functor_wrapper_type;
- typedef std::multimap<std::size_t, functor_wrapper_type>
+ typedef std::vector<std::vector<functor_wrapper_type> >
semantic_actions_type;
typedef detail::wrap_action<functor_wrapper_type
@@ -143,34 +149,31 @@
template <typename IterData>
static_data (IterData const& data_, Iterator& first_, Iterator const& last_)
- : base_type(data_, first_, last_),
- actions(data_.actions_)
- {}
+ : base_type(data_, first_, last_)
+ , actions(data_.actions_), state_names_(data_.state_names_)
+ , state_count_(data_.state_count_) {}
// invoke attached semantic actions, if defined
- bool invoke_actions(std::size_t id, Iterator const& end)
+ bool invoke_actions(std::size_t state, std::size_t id
+ , std::size_t unique_id, Iterator const& end)
{
- if (actions.empty())
- return true; // nothing to invoke, continue with 'match'
+ if (state >= actions_.size())
+ return true; // no action defined for this state
- iterpair_type itp(this->first, end);
- bool match = true;
+ std::vector<functor_wrapper_type> const& actions = actions_[state];
- typedef typename semantic_actions_type::const_iterator
- iterator_type;
+ if (unique_id >= actions.size() || !actions[unique_id])
+ return true; // nothing to invoke, continue with 'match'
- std::pair<iterator_type, iterator_type> p = actions.equal_range(id);
- while (p.first != p.second)
- {
- ((*p.first).second)(itp, id, match, *this);
- if (!match)
- return false; // return a 'no-match'
- ++p.first;
- }
- return true; // normal execution
+ iterpair_type itp(this->first, end);
+ bool match = true;
+ actions[unique_id](itp, id, match, *this);
+ return match;
}
- semantic_actions_type const& actions;
+ semantic_actions_type const& actions_;
+ std::size_t const state_count_;
+ const char* const* state_names_;
};
}
@@ -279,8 +282,9 @@
#endif
Iterator end = data.first;
- std::size_t id = data.next(end);
-
+ std::size_t unique_id = boost::lexer::npos;
+ std::size_t id = data.next(end, unique_id);
+
if (boost::lexer::npos == id) { // no match
#if defined(BOOST_SPIRIT_DEBUG)
std::string next;
@@ -318,7 +322,7 @@
std::size_t state = data.get_state();
// invoke attached semantic actions, if there are any defined
- if (!data.invoke_actions(id, end))
+ if (!data.invoke_actions(state, id, unique_id, end))
{
// one of the semantic actions signaled no-match
return result = result_type(0);
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -30,6 +30,7 @@
// This is a forward declaration for the generated static table of
// valid state names
extern char const* const lexer_state_names[];
+ extern std::size_t const lexer_state_count;
// This is the forward declaration of the generated function to be
// called to get the next token.
@@ -158,6 +159,8 @@
{
typename Functor::next_token_functor next_;
typename Functor::semantic_actions_type const& actions_;
+ std::size_t const state_count_;
+ const char* const* state_names_;
};
public:
@@ -167,7 +170,8 @@
template <typename F>
iterator_type begin(Iterator& first, Iterator const& last, F next) const
{
- iterator_data_type iterator_data = { next, actions };
+ iterator_data_type iterator_data = { next, actions
+ , static_::lexer_state_count, static_::lexer_state_names };
return iterator_type(iterator_data, first, last);
}
@@ -178,7 +182,8 @@
iterator_type begin(Iterator_& first, Iterator_ const& last) const
{
iterator_data_type iterator_data =
- { &lex::lexertl::static_::next_token<Iterator_>, actions };
+ { &lex::lexertl::static_::next_token<Iterator_>, actions,
+ static_::lexer_state_count, static_::lexer_state_names };
return iterator_type(iterator_data, first, last);
}
Modified: trunk/boost/spirit/home/lex/lexer/sequence.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/sequence.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/sequence.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -40,9 +40,16 @@
: elements(elements) {}
template <typename LexerDef, typename String>
- void collect(LexerDef& lexdef, String const& state)
+ void collect(LexerDef& lexdef, String const& state) const
{
- detail::sequence_function<LexerDef, String> f (lexdef, state);
+ detail::sequence_collect_function<LexerDef, String> f (lexdef, state);
+ fusion::any(elements, f);
+ }
+
+ template <typename LexerDef>
+ void add_actions(LexerDef& lexdef) const
+ {
+ detail::sequence_add_actions_function<LexerDef> f (lexdef);
fusion::any(elements, f);
}
Modified: trunk/boost/spirit/home/lex/lexer/string_token_def.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/string_token_def.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/string_token_def.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -69,6 +69,9 @@
lexdef.add_token (state.c_str(), str_, id_);
}
+ template <typename LexerDef>
+ void add_actions(LexerDef& lexdef) const {}
+
std::size_t id() const { return id_; }
string_type str_;
Modified: trunk/boost/spirit/home/lex/lexer/token_def.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/token_def.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/token_def.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -168,6 +168,9 @@
lexdef.add_token(state.c_str(), get<char_type>(def), token_id);
}
+ template <typename LexerDef>
+ void add_actions(LexerDef& lexdef) const {}
+
public:
typedef Char char_type;
typedef Idtype id_type;
@@ -180,8 +183,7 @@
explicit token_def(char_type def_, Idtype id_ = Idtype())
: proto_base_type(terminal_type::make(alias()))
- , def(def_)
- , token_id(Idtype() == id_ ? def_ : id_)
+ , def(def_), token_id(Idtype() == id_ ? def_ : id_)
, token_state(~0) {}
explicit token_def(string_type const& def_, Idtype id_ = Idtype())
@@ -207,6 +209,7 @@
// general accessors
Idtype id() const { return token_id; }
void id(Idtype id) { token_id = id; }
+
string_type definition() const
{
return (0 == def.which())
Modified: trunk/boost/spirit/home/lex/lexer/token_set.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/token_set.hpp (original)
+++ trunk/boost/spirit/home/lex/lexer/token_set.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -91,10 +91,10 @@
// If the following assertion fires you probably forgot to
// associate this token set definition with a lexer instance.
- BOOST_ASSERT(~0 != token_state);
+ BOOST_ASSERT(~0 != token_state_);
token_type &t = *first;
- if (token_is_valid(t) && token_state == t.state()) {
+ if (token_is_valid(t) && token_state_ == t.state()) {
// any of the token definitions matched
qi::detail::assign_to(t, attr);
++first;
@@ -123,12 +123,15 @@
// is not possible. Please create a separate token_set instance
// from the same set of regular expressions for each lexer state it
// needs to be associated with.
- BOOST_ASSERT(~0 == token_state || state_id == token_state);
+ BOOST_ASSERT(~0 == token_state_ || state_id == token_state_);
- token_state = state_id;
+ token_state_ = state_id;
lexdef.add_token (state.c_str(), *this);
}
+ template <typename LexerDef>
+ void add_actions(LexerDef& lexdef) const {}
+
private:
// allow to use the tokset.add("regex1", id1)("regex2", id2);
// syntax
@@ -176,15 +179,15 @@
tokdef.id(token_id);
}
- def.add_token (def.initial_state().c_str(), tokdef.definition(),
- token_id);
+ def.add_token(def.initial_state().c_str(), tokdef.definition()
+ , token_id);
return *this;
}
template <typename TokenSet_>
adder const& operator()(token_set<TokenSet_> const& tokset) const
{
- def.add_token (def.initial_state().c_str(), tokset);
+ def.add_token(def.initial_state().c_str(), tokset);
return *this;
}
@@ -209,18 +212,27 @@
};
friend struct pattern_adder;
+ private:
+ // Helper function to invoke the necessary 2 step compilation process
+ // on token definition expressions
+ template <typename TokenExpr>
+ void compile2pass(TokenExpr const& expr)
+ {
+ expr.collect(*this, base_token_set::initial_state());
+ expr.add_actions(*this);
+ }
+
public:
///////////////////////////////////////////////////////////////////
template <typename Expr>
void define(Expr const& expr)
{
- compile<lex::domain>(expr).collect(
- *this, base_token_set::initial_state());
+ compile2pass(compile<lex::domain>(expr));
}
token_set()
: proto_base_type(terminal_type::make(alias()))
- , add(this_()), add_pattern(this_()), token_state(~0) {}
+ , add(this_()), add_pattern(this_()), token_state_(~0) {}
// allow to assign a token definition expression
template <typename Expr>
@@ -240,10 +252,10 @@
adder add;
pattern_adder add_pattern;
- std::size_t state() const { return token_state; }
+ std::size_t state() const { return token_state_; }
private:
- mutable std::size_t token_state;
+ mutable std::size_t token_state_;
};
// allow to assign a token definition expression
Modified: trunk/boost/spirit/home/lex/reference.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/reference.hpp (original)
+++ trunk/boost/spirit/home/lex/reference.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -37,7 +37,13 @@
template <typename LexerDef, typename String>
void collect(LexerDef& lexdef, String const& state) const
{
- return this->ref.get().collect(lexdef, state);
+ this->ref.get().collect(lexdef, state);
+ }
+
+ template <typename LexerDef>
+ void add_actions(LexerDef& lexdef) const
+ {
+ this->ref.get().add_actions(lexdef);
}
};
Modified: trunk/boost/spirit/home/support/detail/lexer/consts.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/consts.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/consts.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -15,10 +15,10 @@
namespace lexer
{
enum regex_flags {none = 0, icase = 1, dot_not_newline = 2};
- // 0 = end state, 1 = id, 2 = lex state, 3 = bol, 4 = eol,
- // 5 = dead_state_index
- enum {end_state_index, id_index, state_index, bol_index, eol_index,
- dead_state_index, dfa_offset};
+ // 0 = end state, 1 = id, 2 = unique_id, 3 = lex state, 4 = bol, 5 = eol,
+ // 6 = dead_state_index
+ enum {end_state_index, id_index, unique_id_index, state_index, bol_index,
+ eol_index, dead_state_index, dfa_offset};
const std::size_t max_macro_len = 30;
const std::size_t num_chars = 256;
Modified: trunk/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -29,6 +29,7 @@
bool _end_state;
std::size_t _id;
+ std::size_t _unique_id;
std::size_t _state;
std::size_t _bol_index;
std::size_t _eol_index;
@@ -37,6 +38,7 @@
state () :
_end_state (false),
_id (0),
+ _unique_id (npos),
_state (0),
_bol_index (npos),
_eol_index (npos)
Modified: trunk/boost/spirit/home/support/detail/lexer/debug.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/debug.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/debug.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -165,6 +165,8 @@
{
end_state (stream_);
stream_ << iter_->id;
+ unique_id (stream_);
+ stream_ << iter_->unique_id;
dfa (stream_);
stream_ << iter_->goto_dfa;
stream_ << std::endl;
@@ -281,6 +283,16 @@
stream_ << L" END STATE, Id = ";
}
+ static void unique_id (std::ostream &stream_)
+ {
+ stream_ << ", Unique Id = ";
+ }
+
+ static void unique_id (std::wostream &stream_)
+ {
+ stream_ << L", Unique Id = ";
+ }
+
static void any (std::ostream &stream_)
{
stream_ << " . -> ";
Modified: trunk/boost/spirit/home/support/detail/lexer/file_input.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/file_input.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/file_input.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -33,6 +33,7 @@
struct data
{
std::size_t id;
+ std::size_t unique_id;
const CharT *start;
const CharT *end;
std::size_t state;
@@ -40,14 +41,16 @@
// Construct in end() state.
data () :
id (0),
+ unique_id (npos),
state (npos)
{
}
bool operator == (const data &rhs_) const
{
- return id == rhs_.id && start == rhs_.start &&
- end == rhs_.end && state == rhs_.state;
+ return id == rhs_.id && unique_id == rhs_.unique_id &&
+ start == rhs_.start && end == rhs_.end &&
+ state == rhs_.state;
}
};
@@ -105,12 +108,12 @@
_data.id = _input->next (&internals_._lookup->front ()->
front (), internals_._dfa_alphabet.front (),
&internals_._dfa->front ()->front (), _data.start,
- _data.end);
+ _data.end, _data.unique_id);
}
else
{
_data.id = _input->next (internals_, _data.state, _data.start,
- _data.end);
+ _data.end, _data.unique_id);
}
if (_data.id == 0)
@@ -156,6 +159,7 @@
iterator iter_;
iter_._input = this;
+ // Over-ride default of 0 (EOF)
iter_._data.id = npos;
iter_._data.start = 0;
iter_._data.end = 0;
@@ -200,7 +204,8 @@
CharT *_end_buffer;
std::size_t next (const detail::internals &internals_,
- std::size_t &start_state_, const CharT * &start_, const CharT * &end_)
+ std::size_t &start_state_, const CharT * &start_, const CharT * &end_,
+ std::size_t &unique_id_)
{
_start_token = _end_token;
@@ -213,6 +218,7 @@
const CharT *curr_ = _start_token;
bool end_state_ = *ptr_ != 0;
std::size_t id_ = *(ptr_ + id_index);
+ std::size_t uid_ = *(ptr_ + unique_id_index);
const CharT *end_token_ = curr_;
for (;;)
@@ -255,6 +261,7 @@
{
end_state_ = true;
id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
start_state_ = *(ptr_ + state_index);
end_token_ = curr_;
}
@@ -263,6 +270,7 @@
if (_start_token >= _end_buffer)
{
// No more tokens...
+ unique_id_ = npos;
return 0;
}
@@ -276,6 +284,7 @@
{
end_state_ = true;
id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
start_state_ = *(ptr_ + state_index);
end_token_ = curr_;
}
@@ -293,23 +302,26 @@
// No match causes char to be skipped
_end_token = _start_token + 1;
id_ = npos;
+ uid_ = npos;
}
start_ = _start_token;
end_ = _end_token;
+ unique_id_ = uid_;
return id_;
}
std::size_t next (const std::size_t * const lookup_,
const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
- const CharT * &start_, const CharT * &end_)
+ const CharT * &start_, const CharT * &end_, std::size_t &unique_id_)
{
_start_token = _end_token;
const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
const CharT *curr_ = _start_token;
bool end_state_ = *ptr_ != 0;
- std::size_t id_ = id_ = *(ptr_ + id_index);
+ std::size_t id_ = *(ptr_ + id_index);
+ std::size_t uid_ = *(ptr_ + unique_id_index);
const CharT *end_token_ = curr_;
for (;;)
@@ -352,6 +364,7 @@
{
end_state_ = true;
id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
end_token_ = curr_;
}
}
@@ -359,6 +372,7 @@
if (_start_token >= _end_buffer)
{
// No more tokens...
+ unique_id_ = npos;
return 0;
}
@@ -372,6 +386,7 @@
{
end_state_ = true;
id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
end_token_ = curr_;
}
}
@@ -386,10 +401,12 @@
// No match causes char to be skipped
_end_token = _start_token + 1;
id_ = npos;
+ uid_ = npos;
}
start_ = _start_token;
end_ = _end_token;
+ unique_id_ = uid_;
return id_;
}
Modified: trunk/boost/spirit/home/support/detail/lexer/generator.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/generator.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/generator.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -148,6 +148,7 @@
const typename rules::string_deque_deque ®exes_ =
rules_.regexes ();
const typename rules::id_vector_deque &ids_ = rules_.ids ();
+ std::size_t unique_id_ = 0;
const typename rules::id_vector_deque &states_ = rules_.states ();
typename rules::string_deque::const_iterator regex_iter_ =
regexes_[state_].begin ();
@@ -170,9 +171,9 @@
seen_BOL_assertion_, seen_EOL_assertion_);
detail::node *root_ = parser::parse (regex_.c_str (),
- regex_.c_str () + regex_.size (), *ids_iter_, *states_iter_,
- rules_.flags (), rules_.locale (), node_ptr_vector_, macromap_,
- token_map_, seen_BOL_assertion_, seen_EOL_assertion_);
+ regex_.c_str () + regex_.size (), *ids_iter_, unique_id_++,
+ *states_iter_, rules_.flags (), rules_.locale (), node_ptr_vector_,
+ macromap_, token_map_, seen_BOL_assertion_, seen_EOL_assertion_);
++regex_iter_;
++ids_iter_;
@@ -186,7 +187,7 @@
const typename rules::string ®ex_ = *regex_iter_;
root_ = parser::parse (regex_.c_str (),
- regex_.c_str () + regex_.size (), *ids_iter_,
+ regex_.c_str () + regex_.size (), *ids_iter_, unique_id_++,
*states_iter_, rules_.flags (), rules_.locale (),
node_ptr_vector_, macromap_, token_map_,
seen_BOL_assertion_, seen_EOL_assertion_);
@@ -277,7 +278,7 @@
const typename rules::string &name_ = iter_->first;
const typename rules::string ®ex_ = iter_->second;
detail::node *node_ = parser::parse (regex_.c_str (),
- regex_.c_str () + regex_.size (), 0, 0, flags_,
+ regex_.c_str () + regex_.size (), 0, 0, 0, flags_,
locale_, node_ptr_vector_, macromap_, token_map_,
seen_BOL_assertion_, seen_EOL_assertion_);
macro_iter_pair map_iter_ = macromap_.
@@ -362,6 +363,7 @@
{
bool end_state_ = false;
std::size_t id_ = 0;
+ std::size_t unique_id_ = npos;
std::size_t state_ = 0;
std::size_t hash_ = 0;
@@ -375,8 +377,8 @@
followpos_->begin (), end_ = followpos_->end ();
iter_ != end_; ++iter_)
{
- closure_ex (*iter_, end_state_, id_, state_, set_ptr_.get (),
- vector_ptr_.get (), hash_);
+ closure_ex (*iter_, end_state_, id_, unique_id_, state_,
+ set_ptr_.get (), vector_ptr_.get (), hash_);
}
bool found_ = false;
@@ -413,6 +415,7 @@
{
dfa_[old_size_] |= end_state;
dfa_[old_size_ + id_index] = id_;
+ dfa_[old_size_ + unique_id_index] = unique_id_;
dfa_[old_size_ + state_index] = state_;
}
}
@@ -421,8 +424,8 @@
}
static void closure_ex (detail::node *node_, bool &end_state_,
- std::size_t &id_, std::size_t &state_, node_set *set_ptr_,
- node_vector *vector_ptr_, std::size_t &hash_)
+ std::size_t &id_, std::size_t &unique_id_, std::size_t &state_,
+ node_set *set_ptr_, node_vector *vector_ptr_, std::size_t &hash_)
{
const bool temp_end_state_ = node_->end_state ();
@@ -432,6 +435,7 @@
{
end_state_ = true;
id_ = node_->id ();
+ unique_id_ = node_->unique_id ();
state_ = node_->lexer_state ();
}
}
@@ -502,7 +506,7 @@
}
else
{
- iter_ = lhs_->insert (++iter_, 0);
+ iter_ = lhs_->insert (++iter_, (charset*)0);
*iter_ = overlap_.release ();
// VC++ 6 Hack:
@@ -644,7 +648,7 @@
}
else
{
- iter_ = lhs_->insert (++iter_, 0);
+ iter_ = lhs_->insert (++iter_, (equivset*)0);
*iter_ = overlap_.release ();
// VC++ 6 Hack:
@@ -816,6 +820,7 @@
new_ptr_[end_state_index] = ptr_[end_state_index];
new_ptr_[id_index] = ptr_[id_index];
+ new_ptr_[unique_id_index] = ptr_[unique_id_index];
new_ptr_[state_index] = ptr_[state_index];
new_ptr_[bol_index] = lookup_ptr_[ptr_[bol_index]];
new_ptr_[eol_index] = lookup_ptr_[ptr_[eol_index]];
Modified: trunk/boost/spirit/home/support/detail/lexer/input.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/input.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/input.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -32,6 +32,7 @@
struct data
{
std::size_t id;
+ std::size_t unique_id;
FwdIter start;
FwdIter end;
bool bol;
@@ -40,6 +41,7 @@
// Construct in end() state.
data () :
id (0),
+ unique_id (npos),
bol (false),
state (npos)
{
@@ -47,8 +49,9 @@
bool operator == (const data &rhs_) const
{
- return id == rhs_.id && start == rhs_.start &&
- end == rhs_.end && bol == rhs_.bol && state == rhs_.state;
+ return id == rhs_.id && unique_id == rhs_.unique_id &&
+ start == rhs_.start && end == rhs_.end &&
+ bol == rhs_.bol && state == rhs_.state;
}
};
@@ -115,13 +118,14 @@
(&internals_._lookup->front ()->front (),
internals_._dfa_alphabet.front (),
&internals_._dfa->front ()->front (),
- _data.bol, _data.end, _input->_end);
+ _data.bol, _data.end, _input->_end, _data.unique_id);
}
else
{
_data.id = next (&internals_._lookup->front ()->front (),
internals_._dfa_alphabet.front (), &internals_.
- _dfa->front ()->front (), _data.end, _input->_end);
+ _dfa->front ()->front (), _data.end, _input->_end,
+ _data.unique_id);
}
}
else
@@ -130,12 +134,12 @@
internals_._seen_EOL_assertion)
{
_data.id = next (internals_, _data.state,
- _data.bol, _data.end, _input->_end);
+ _data.bol, _data.end, _input->_end, _data.unique_id);
}
else
{
_data.id = next (internals_, _data.state,
- _data.end, _input->_end);
+ _data.end, _input->_end, _data.unique_id);
}
}
@@ -148,9 +152,14 @@
std::size_t next (const detail::internals &internals_,
std::size_t &start_state_, bool bol_,
- FwdIter &start_token_, const FwdIter &end_)
+ FwdIter &start_token_, const FwdIter &end_,
+ std::size_t &unique_id_)
{
- if (start_token_ == end_) return 0;
+ if (start_token_ == end_)
+ {
+ unique_id_ = npos;
+ return 0;
+ }
again:
const std::size_t * lookup_ = &internals_._lookup[start_state_]->
@@ -161,6 +170,7 @@
FwdIter curr_ = start_token_;
bool end_state_ = *ptr_ != 0;
std::size_t id_ = *(ptr_ + id_index);
+ std::size_t uid_ = *(ptr_ + unique_id_index);
bool end_bol_ = bol_;
FwdIter end_token_ = start_token_;
@@ -199,6 +209,7 @@
{
end_state_ = true;
id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
start_state_ = *(ptr_ + state_index);
end_bol_ = bol_;
end_token_ = curr_;
@@ -215,6 +226,7 @@
{
end_state_ = true;
id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
start_state_ = *(ptr_ + state_index);
end_bol_ = bol_;
end_token_ = curr_;
@@ -239,16 +251,22 @@
_data.bol = *start_token_ == '\n';
++start_token_;
id_ = npos;
+ uid_ = npos;
}
+ unique_id_ = uid_;
return id_;
}
std::size_t next (const detail::internals &internals_,
std::size_t &start_state_, FwdIter &start_token_,
- FwdIter const &end_)
+ FwdIter const &end_, std::size_t &unique_id_)
{
- if (start_token_ == end_) return 0;
+ if (start_token_ == end_)
+ {
+ unique_id_ = npos;
+ return 0;
+ }
again:
const std::size_t * lookup_ = &internals_._lookup[start_state_]->
@@ -259,6 +277,7 @@
FwdIter curr_ = start_token_;
bool end_state_ = *ptr_ != 0;
std::size_t id_ = *(ptr_ + id_index);
+ std::size_t uid_ = *(ptr_ + unique_id_index);
FwdIter end_token_ = start_token_;
while (curr_ != end_)
@@ -277,6 +296,7 @@
{
end_state_ = true;
id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
start_state_ = *(ptr_ + state_index);
end_token_ = curr_;
}
@@ -294,21 +314,29 @@
// No match causes char to be skipped
++start_token_;
id_ = npos;
+ uid_ = npos;
}
+ unique_id_ = uid_;
return id_;
}
std::size_t next (const std::size_t * const lookup_,
const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
- bool bol_, FwdIter &start_token_, FwdIter const &end_)
+ bool bol_, FwdIter &start_token_, FwdIter const &end_,
+ std::size_t &unique_id_)
{
- if (start_token_ == end_) return 0;
+ if (start_token_ == end_)
+ {
+ unique_id_ = npos;
+ return 0;
+ }
const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
FwdIter curr_ = start_token_;
bool end_state_ = *ptr_ != 0;
std::size_t id_ = *(ptr_ + id_index);
+ std::size_t uid_ = *(ptr_ + unique_id_index);
bool end_bol_ = bol_;
FwdIter end_token_ = start_token_;
@@ -347,6 +375,7 @@
{
end_state_ = true;
id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
end_bol_ = bol_;
end_token_ = curr_;
}
@@ -362,6 +391,7 @@
{
end_state_ = true;
id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
end_bol_ = bol_;
end_token_ = curr_;
}
@@ -379,21 +409,29 @@
_data.bol = *start_token_ == '\n';
++start_token_;
id_ = npos;
+ uid_ = npos;
}
+ unique_id_ = uid_;
return id_;
}
std::size_t next (const std::size_t * const lookup_,
const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
- FwdIter &start_token_, FwdIter const &end_)
+ FwdIter &start_token_, FwdIter const &end_,
+ std::size_t &unique_id_)
{
- if (start_token_ == end_) return 0;
+ if (start_token_ == end_)
+ {
+ unique_id_ = npos;
+ return 0;
+ }
const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
FwdIter curr_ = start_token_;
bool end_state_ = *ptr_ != 0;
std::size_t id_ = *(ptr_ + id_index);
+ std::size_t uid_ = *(ptr_ + unique_id_index);
FwdIter end_token_ = start_token_;
while (curr_ != end_)
@@ -412,6 +450,7 @@
{
end_state_ = true;
id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
end_token_ = curr_;
}
}
@@ -426,8 +465,10 @@
// No match causes char to be skipped
++start_token_;
id_ = npos;
+ uid_ = npos;
}
+ unique_id_ = uid_;
return id_;
}
};
@@ -452,6 +493,7 @@
iterator iter_;
iter_._input = this;
+ // Over-ride default of 0 (EOI)
iter_._data.id = npos;
iter_._data.start = _begin;
iter_._data.end = _begin;
Modified: trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -51,10 +51,10 @@
<DUPLICATE> -> '?' | '*' | '+' | '{n[,[m]]}'
*/
static node *parse (const CharT *start_, const CharT * const end_,
- const std::size_t id_, const std::size_t dfa_state_,
- const regex_flags flags_, const std::locale &locale_,
- node_ptr_vector &node_ptr_vector_, const macro_map ¯omap_,
- typename tokeniser::token_map &map_,
+ const std::size_t id_, const std::size_t unique_id_,
+ const std::size_t dfa_state_, const regex_flags flags_,
+ const std::locale &locale_, node_ptr_vector &node_ptr_vector_,
+ const macro_map ¯omap_, typename tokeniser::token_map &map_,
bool &seen_BOL_assertion_, bool &seen_EOL_assertion_)
{
node *root_ = 0;
@@ -116,7 +116,7 @@
{
node_ptr_vector_->push_back (0);
- node *rhs_node_ = new end_node (id_, dfa_state_);
+ node *rhs_node_ = new end_node (id_, unique_id_, dfa_state_);
node_ptr_vector_->back () = rhs_node_;
node_ptr_vector_->push_back (0);
Modified: trunk/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -18,9 +18,11 @@
class end_node : public node
{
public:
- end_node (const std::size_t id_, const std::size_t lexer_state_) :
+ end_node (const std::size_t id_, const std::size_t unique_id_,
+ const std::size_t lexer_state_) :
node (false),
_id (id_),
+ _unique_id (unique_id_),
_lexer_state (lexer_state_)
{
node::_firstpos.push_back (this);
@@ -58,6 +60,11 @@
return _id;
}
+ virtual std::size_t unique_id () const
+ {
+ return _unique_id;
+ }
+
virtual std::size_t lexer_state () const
{
return _lexer_state;
@@ -65,6 +72,7 @@
private:
std::size_t _id;
+ std::size_t _unique_id;
std::size_t _lexer_state;
node_vector _followpos;
Modified: trunk/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -132,6 +132,11 @@
throw runtime_error ("Internal error node::id()");
}
+ virtual std::size_t unique_id () const
+ {
+ throw runtime_error ("Internal error node::unique_id()");
+ }
+
virtual std::size_t lexer_state () const
{
throw runtime_error ("Internal error node::state()");
Modified: trunk/boost/spirit/home/support/detail/lexer/rules.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/rules.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/rules.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -44,6 +44,27 @@
return L"INITIAL";
}
};
+
+ template <typename CharT>
+ struct dot;
+
+ template<>
+ struct dot<char>
+ {
+ static const char *str ()
+ {
+ return ".";
+ }
+ };
+
+ template<>
+ struct dot<wchar_t>
+ {
+ static const wchar_t *str()
+ {
+ return L".";
+ }
+ };
}
template<typename CharT>
@@ -60,6 +81,8 @@
typedef std::deque<string_pair> string_pair_deque;
typedef std::map<string, std::size_t> string_size_t_map;
typedef std::pair<string, std::size_t> string_size_t_pair;
+ typedef std::pair<std::size_t, std::size_t> unique_id_key;
+ typedef std::map<unique_id_key, std::size_t> unique_id_map;
basic_rules (const regex_flags flags_ = dot_not_newline) :
_flags (flags_)
@@ -141,7 +164,7 @@
}
}
- void add_state (const CharT *name_)
+ std::size_t add_state (const CharT *name_)
{
validate (name_);
@@ -157,6 +180,9 @@
_lexer_state_names.push_back (name_);
}
}
+
+ // Initial is not stored, so no need to - 1.
+ return _lexer_state_names.size();
}
void add_macro (const CharT *name_, const CharT *regex_)
@@ -198,62 +224,68 @@
}
}
- void add (const CharT *regex_, const std::size_t id_)
+ std::size_t add (const CharT *regex_, const std::size_t id_)
{
- add (string (regex_), id_);
+ return add (string (regex_), id_);
}
- void add (const CharT *regex_start_, const CharT *regex_end_,
+ std::size_t add (const CharT *regex_start_, const CharT *regex_end_,
const std::size_t id_)
{
- add (string (regex_start_, regex_end_), id_);
+ return add (string (regex_start_, regex_end_), id_);
}
- void add (const string ®ex_, const std::size_t id_)
+ std::size_t add (const string ®ex_, const std::size_t id_)
{
check_for_invalid_id (id_);
_regexes[0].push_back (regex_);
_ids[0].push_back (id_);
_states[0].push_back (0);
+ return _regexes[0].size () - 1;
}
void add (const CharT *curr_state_, const CharT *regex_,
- const CharT *new_state_)
+ const CharT *new_state_, id_vector *id_vec_ = 0)
{
- add (curr_state_, string (regex_), new_state_);
+ add (curr_state_, string (regex_), new_state_, id_vec_);
}
void add (const CharT *curr_state_, const CharT *regex_start_,
- const CharT *regex_end_, const CharT *new_state_)
+ const CharT *regex_end_, const CharT *new_state_,
+ id_vector *id_vec_ = 0)
{
- add (curr_state_, string (regex_start_, regex_end_), new_state_);
+ add (curr_state_, string (regex_start_, regex_end_),
+ new_state_, id_vec_);
}
void add (const CharT *curr_state_, const string ®ex_,
- const CharT *new_state_)
+ const CharT *new_state_, id_vector *id_vec_ = 0)
{
- add (curr_state_, regex_, 0, new_state_, false);
+ add (curr_state_, regex_, 0, new_state_, false, id_vec_);
}
void add (const CharT *curr_state_, const CharT *regex_,
- const std::size_t id_, const CharT *new_state_)
+ const std::size_t id_, const CharT *new_state_, id_vector *id_vec_ = 0)
{
- add (curr_state_, string (regex_), id_, new_state_);
+ add (curr_state_, string (regex_), id_, new_state_, id_vec_);
}
void add (const CharT *curr_state_, const CharT *regex_start_,
- const CharT *regex_end_, const std::size_t id_, const CharT *new_state_)
+ const CharT *regex_end_, const std::size_t id_,
+ const CharT *new_state_, id_vector *id_vec_ = 0)
{
- add (curr_state_, string (regex_start_, regex_end_), id_, new_state_);
+ add (curr_state_, string (regex_start_, regex_end_), id_,
+ new_state_, id_vec_);
}
void add (const CharT *curr_state_, const string ®ex_,
- const std::size_t id_, const CharT *new_state_)
+ const std::size_t id_, const CharT *new_state_, id_vector *id_vec_ = 0)
{
- add (curr_state_, regex_, id_, new_state_, true);
+ add (curr_state_, regex_, id_, new_state_, true, id_vec_);
}
- void add (const CharT *curr_state_, const basic_rules &rules_)
+ void add (const CharT *curr_state_, const basic_rules &rules_,
+ id_vector *id_vec_ = 0)
{
const string_deque_deque ®exes_ = rules_.regexes ();
const id_vector_deque &ids_ = rules_.ids ();
@@ -266,6 +298,7 @@
typename string_deque::const_iterator regex_iter_;
typename string_deque::const_iterator regex_end_;
typename id_vector::const_iterator id_iter_;
+ id_vector *temp_id_vec_ = id_vec_;
for (; state_regex_iter_ != state_regex_end_; ++state_regex_iter_)
{
@@ -275,7 +308,16 @@
for (; regex_iter_ != regex_end_; ++regex_iter_, ++id_iter_)
{
- add (curr_state_, *regex_iter_, *id_iter_, curr_state_);
+ add (curr_state_, *regex_iter_, *id_iter_, detail::dot<CharT>::str(),
+ temp_id_vec_);
+
+ if (temp_id_vec_)
+ {
+ // As suggested by Hartmut, only fill the id_vec_ once.
+ // The dfa sizes can be examined at the end to get a range
+ // of ids.
+ temp_id_vec_ = 0;
+ }
}
}
}
@@ -328,6 +370,19 @@
return detail::initial<CharT>::str ();
}
+ std::size_t retrieve_id (std::size_t state, std::size_t id) const
+ {
+ unique_id_key key (state, id);
+ typename unique_id_map::const_iterator it = _unique_ids.find (key);
+
+ if (it == _unique_ids.end ())
+ {
+ return npos;
+ }
+
+ return (*it).second;
+ }
+
private:
string_size_t_map _statemap;
string_pair_deque _macrodeque;
@@ -338,13 +393,20 @@
regex_flags _flags;
std::locale _locale;
string_deque _lexer_state_names;
+ unique_id_map _unique_ids;
void add (const CharT *curr_state_, const string ®ex_,
- const std::size_t id_, const CharT *new_state_, const bool check_)
+ const std::size_t id_, const CharT *new_state_, const bool check_,
+ id_vector *id_vec_ = 0)
{
const bool star_ = *curr_state_ == '*' && *(curr_state_ + 1) == 0;
const bool dot_ = *new_state_ == '.' && *(new_state_ + 1) == 0;
+ if (id_vec_)
+ {
+ id_vec_->clear();
+ }
+
if (check_)
{
check_for_invalid_id (id_);
@@ -443,6 +505,13 @@
_regexes[curr_].push_back (regex_);
_ids[curr_].push_back (id_);
_states[curr_].push_back (dot_ ? curr_ : new_);
+
+ if (id_vec_)
+ {
+ id_vec_->push_back (_regexes[curr_].size () - 1);
+ }
+
+ map_id (dot_ ? curr_ : new_, id_, _regexes[curr_].size () - 1);
}
}
@@ -528,6 +597,22 @@
break;
}
}
+
+ bool map_id (std::size_t state, std::size_t id, std::size_t unique_id)
+ {
+ typedef typename unique_id_map::iterator iterator_type;
+
+ unique_id_key key (state, id);
+ iterator_type it = _unique_ids.find (key);
+ if (it != _unique_ids.end ())
+ {
+ (*it).second = unique_id;
+ return false;
+ }
+
+ typedef typename unique_id_map::value_type value_type;
+ return _unique_ids.insert (value_type (key, unique_id)).second;
+ }
};
typedef basic_rules<char> rules;
Modified: trunk/boost/spirit/home/support/detail/lexer/state_machine.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/state_machine.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/state_machine.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -45,6 +45,7 @@
// Current state info
bool end_state;
std::size_t id;
+ std::size_t unique_id;
std::size_t goto_dfa;
std::size_t bol_index;
std::size_t eol_index;
@@ -61,6 +62,7 @@
transition (npos),
end_state (false),
id (npos),
+ unique_id (npos),
goto_dfa (npos),
bol_index (npos),
eol_index (npos),
@@ -77,6 +79,7 @@
transition == rhs_.transition &&
end_state == rhs_.end_state &&
id == rhs_.id &&
+ unique_id == rhs_.unique_id &&
goto_dfa == rhs_.goto_dfa &&
bol_index == rhs_.bol_index &&
eol_index == rhs_.eol_index &&
@@ -197,6 +200,7 @@
_transitions = _data.transitions = ptr_->_transitions.size ();
_data.end_state = ptr_->_end_state;
_data.id = ptr_->_id;
+ _data.unique_id = ptr_->_unique_id;
_data.goto_dfa = ptr_->_state;
_data.bol_index = ptr_->_bol_index;
_data.eol_index = ptr_->_eol_index;
@@ -281,6 +285,7 @@
iter_._transition = 0;
iter_._data.end_state = ptr_->front ()._end_state;
iter_._data.id = ptr_->front ()._id;
+ iter_._data.unique_id = ptr_->front()._unique_id;
iter_._data.goto_dfa = ptr_->front ()._state;
iter_._data.bol_index = ptr_->front ()._bol_index;
iter_._data.eol_index = ptr_->front ()._eol_index;
@@ -368,6 +373,7 @@
state_->_end_state = *read_ptr_ != 0;
state_->_id = *(read_ptr_ + id_index);
+ state_->_unique_id = *(read_ptr_ + unique_id_index);
state_->_state = *(read_ptr_ + state_index);
state_->_bol_index = *(read_ptr_ + bol_index) - 1;
state_->_eol_index = *(read_ptr_ + eol_index) - 1;
Modified: trunk/libs/spirit/example/lex/static_lexer/word_count_static.hpp
==============================================================================
--- trunk/libs/spirit/example/lex/static_lexer/word_count_static.hpp (original)
+++ trunk/libs/spirit/example/lex/static_lexer/word_count_static.hpp 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -6,8 +6,8 @@
// Auto-generated by boost::lexer, do not edit
-#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_MAY_14_2009_13_47_08)
-#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_MAY_14_2009_13_47_08
+#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_MAY_22_2009_09_41_02)
+#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_MAY_22_2009_09_41_02
#include <boost/detail/iterator.hpp>
#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
@@ -22,52 +22,58 @@
"INITIAL"
};
+// this variable defines the number of lexer states
+std::size_t const lexer_state_count = 1;
+
template<typename Iterator>
std::size_t next_token (std::size_t &start_state_, Iterator const& start_,
- Iterator &start_token_, Iterator const& end_)
+ Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)
{
- enum {end_state_index, id_index, state_index, bol_index, eol_index,
- dead_state_index, dfa_offset};
+ enum {end_state_index, id_index, unique_id_index, state_index, bol_index,
+ eol_index, dead_state_index, dfa_offset};
static const std::size_t npos = static_cast<std::size_t>(~0);
- static const std::size_t lookup_[256] = {7, 7, 7, 7, 7, 7, 7, 7,
- 7, 8, 6, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 8, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7};
- static const std::size_t dfa_alphabet_ = 9;
- static const std::size_t dfa_[45] = {0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 3,
- 2, 4, 1, 65536, 0, 0, 0, 0,
- 0, 2, 0, 1, 10, 0, 0, 0,
- 0, 0, 0, 0, 1, 65537, 0, 0,
- 0, 0, 0, 0, 0};
+ static const std::size_t lookup_[256] = {
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 9, 7, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 9, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8 };
+ static const std::size_t dfa_alphabet_ = 10;
+ static const std::size_t dfa_[50] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 3, 2, 4, 1, 65536, 0, 0,
+ 0, 0, 0, 0, 2, 0, 1, 10,
+ 1, 0, 0, 0, 0, 0, 0, 0,
+ 1, 65537, 2, 0, 0, 0, 0, 0,
+ 0, 0 };
if (start_token_ == end_) return 0;
@@ -75,13 +81,13 @@
Iterator curr_ = start_token_;
bool end_state_ = *ptr_ != 0;
std::size_t id_ = *(ptr_ + id_index);
+ std::size_t uid_ = *(ptr_ + unique_id_index);
Iterator end_token_ = start_token_;
while (curr_ != end_)
{
std::size_t const state_ =
- ptr_[lookup_[static_cast<unsigned char>
- (*curr_++)]];
+ ptr_[lookup_[static_cast<unsigned char>(*curr_++)]];
if (state_ == 0) break;
@@ -91,6 +97,7 @@
{
end_state_ = true;
id_ = *(ptr_ + id_index);
+ uid_ = *(ptr_ + unique_id_index);
end_token_ = curr_;
}
}
@@ -103,8 +110,10 @@
else
{
id_ = npos;
+ uid_ = npos;
}
+ unique_id_ = uid_;
return id_;
}
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk