Boost logo

Boost-Commit :

From: hartmut.kaiser_at_[hidden]
Date: 2008-07-15 17:26:52


Author: hkaiser
Date: 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
New Revision: 47462
URL: http://svn.boost.org/trac/boost/changeset/47462

Log:
Spirit: Updating lexer
Added:
   trunk/boost/spirit/home/support/detail/lexer/
   trunk/boost/spirit/home/support/detail/lexer/char_traits.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/consts.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/containers/
   trunk/boost/spirit/home/support/detail/lexer/containers/ptr_list.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/containers/ptr_vector.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/conversion/
   trunk/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/debug.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/file_input.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/generate_cpp.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/generator.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/input.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/parser/
   trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser/
   trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser/num_token.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_state.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/parser/tree/
   trunk/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/parser/tree/iteration_node.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/parser/tree/leaf_node.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/parser/tree/selection_node.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/parser/tree/sequence_node.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/partition/
   trunk/boost/spirit/home/support/detail/lexer/partition/charset.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/partition/equivset.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/rules.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/runtime_error.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/serialise.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/size_t.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/state_machine.hpp (contents, props changed)
   trunk/boost/spirit/home/support/detail/lexer/string_token.hpp (contents, props changed)

Added: trunk/boost/spirit/home/support/detail/lexer/char_traits.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/char_traits.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,54 @@
+// char_traits.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_CHAR_TRAITS_H
+#define BOOST_LEXER_CHAR_TRAITS_H
+
+// Make sure wchar_t is defined
+#include <string>
+
+namespace boost
+{
+namespace lexer
+{
+template<typename CharT>
+struct char_traits
+{
+ typedef CharT char_type;
+ typedef CharT index_type;
+
+ static index_type call (CharT ch)
+ {
+ return ch;
+ }
+};
+
+template<>
+struct char_traits<char>
+{
+ typedef char char_type;
+ typedef unsigned char index_type;
+
+ static index_type call (char ch)
+ {
+ return static_cast<index_type>(ch);
+ }
+};
+
+template<>
+struct char_traits<wchar_t>
+{
+ typedef wchar_t char_type;
+ typedef wchar_t index_type;
+
+ static index_type call (wchar_t ch)
+ {
+ return ch;
+ }
+};
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/consts.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/consts.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,35 @@
+// consts.h
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_CONSTS_H
+#define BOOST_LEXER_CONSTS_H
+
+#include <boost/config.hpp>
+#include <boost/integer_traits.hpp>
+#include <boost/spirit/home/support/detail/lexer/size_t.hpp>
+
+namespace boost
+{
+namespace lexer
+{
+ // 0 = end state, 1 = id, 2 = lex state, 3 = bol, 4 = eol,
+ // 5 = dead_state_index
+ enum {end_state_index, id_index, state_index, bol_index, eol_index,
+ dead_state_index, dfa_offset};
+
+ const std::size_t max_macro_len = 20;
+ const std::size_t num_chars = 256;
+ const std::size_t num_wchar_ts =
+ (boost::integer_traits<wchar_t>::const_max < 0x110000) ?
+ boost::integer_traits<wchar_t>::const_max : 0x110000;
+ const std::size_t null_token = static_cast<std::size_t> (~0);
+ const std::size_t bol_token = static_cast<std::size_t> (~1);
+ const std::size_t eol_token = static_cast<std::size_t> (~2);
+ const std::size_t end_state = 1;
+ const std::size_t npos = static_cast<std::size_t> (~0);
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/containers/ptr_list.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/containers/ptr_list.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,71 @@
+// ptr_list.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_PTR_LIST_HPP
+#define BOOST_LEXER_PTR_LIST_HPP
+
+#include <list>
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+template<typename Type>
+class ptr_list
+{
+public:
+ typedef std::list<Type *> list;
+
+ ptr_list ()
+ {
+ }
+
+ ~ptr_list ()
+ {
+ clear ();
+ }
+
+ list *operator -> ()
+ {
+ return &_list;
+ }
+
+ const list *operator -> () const
+ {
+ return &_list;
+ }
+
+ list &operator * ()
+ {
+ return _list;
+ }
+
+ const list &operator * () const
+ {
+ return _list;
+ }
+
+ void clear ()
+ {
+ while (!_list.empty ())
+ {
+ delete _list.front ();
+ _list.pop_front ();
+ }
+ }
+
+private:
+ list _list;
+
+ ptr_list (ptr_list const &); // No copy construction.
+ ptr_list &operator = (ptr_list const &); // No assignment.
+};
+}
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/containers/ptr_vector.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/containers/ptr_vector.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,108 @@
+// ptr_vector.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_PTR_VECTOR_HPP
+#define BOOST_LEXER_PTR_VECTOR_HPP
+
+#include "../size_t.hpp"
+#include <vector>
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+template<typename Type>
+class ptr_vector
+{
+public:
+ typedef std::vector<Type *> vector;
+
+ ptr_vector ()
+ {
+ }
+
+ ~ptr_vector ()
+ {
+ clear ();
+ }
+
+ vector *operator -> ()
+ {
+ return &_vector;
+ }
+
+ const vector *operator -> () const
+ {
+ return &_vector;
+ }
+
+ vector &operator * ()
+ {
+ return _vector;
+ }
+
+ const vector &operator * () const
+ {
+ return _vector;
+ }
+
+ Type * &operator [] (const std::size_t index_)
+ {
+ return _vector[index_];
+ }
+
+ Type * const &operator [] (const std::size_t index_) const
+ {
+ return _vector[index_];
+ }
+
+ bool operator == (const ptr_vector &rhs_) const
+ {
+ bool equal_ = _vector.size () == rhs_._vector.size ();
+
+ if (equal_)
+ {
+ typename vector::const_iterator lhs_iter_ = _vector.begin ();
+ typename vector::const_iterator end_ = _vector.end ();
+ typename vector::const_iterator rhs_iter_ = rhs_._vector.begin ();
+
+ for (; equal_ && lhs_iter_ != end_; ++lhs_iter_, ++rhs_iter_)
+ {
+ equal_ = **lhs_iter_ == **rhs_iter_;
+ }
+ }
+
+ return equal_;
+ }
+
+ void clear ()
+ {
+ if (!_vector.empty ())
+ {
+ Type **iter_ = &_vector.front ();
+ Type **end_ = iter_ + _vector.size ();
+
+ for (; iter_ != end_; ++iter_)
+ {
+ delete *iter_;
+ }
+ }
+
+ _vector.clear ();
+ }
+
+private:
+ vector _vector;
+
+ ptr_vector (ptr_vector const &); // No copy construction.
+ ptr_vector &operator = (ptr_vector const &); // No assignment.
+};
+}
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,75 @@
+// char_state_machine.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_CHAR_STATE_MACHINE_HPP
+#define BOOST_LEXER_CHAR_STATE_MACHINE_HPP
+
+#include "../consts.hpp"
+#include <map>
+#include "../size_t.hpp"
+#include "../string_token.hpp"
+#include <vector>
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+template<typename CharT>
+struct basic_char_state_machine
+{
+ struct state
+ {
+ typedef basic_string_token<CharT> string_token;
+ typedef std::map<std::size_t, string_token> size_t_string_token_map;
+ typedef std::pair<std::size_t, string_token> size_t_string_token_pair;
+
+ bool _end_state;
+ std::size_t _id;
+ std::size_t _state;
+ std::size_t _bol_index;
+ std::size_t _eol_index;
+ size_t_string_token_map _transitions;
+
+ state () :
+ _end_state (false),
+ _id (0),
+ _state (0),
+ _bol_index (npos),
+ _eol_index (npos)
+ {
+ }
+ };
+
+ typedef std::vector<state> state_vector;
+ typedef std::vector<state_vector> state_vector_vector;
+
+ state_vector_vector _sm_vector;
+
+ bool empty () const
+ {
+ return _sm_vector.empty ();
+ }
+
+ void clear ()
+ {
+ _sm_vector.clear ();
+ }
+
+ void swap (basic_char_state_machine &csm_)
+ {
+ _sm_vector.swap (csm_._sm_vector);
+ }
+};
+
+typedef basic_char_state_machine<char> char_state_machine;
+typedef basic_char_state_machine<wchar_t> wchar_state_machine;
+
+}
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/debug.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/debug.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,302 @@
+// debug.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_DEBUG_HPP
+#define BOOST_LEXER_DEBUG_HPP
+
+#include <map>
+#include <ostream>
+#include "size_t.hpp"
+#include "state_machine.hpp"
+#include "string_token.hpp"
+#include <vector>
+
+namespace boost
+{
+namespace lexer
+{
+template<typename CharT>
+class basic_debug
+{
+public:
+ typedef std::basic_ostream<CharT> ostream;
+ typedef std::basic_string<CharT> string;
+ typedef std::vector<std::size_t> size_t_vector;
+
+ static void escape_control_chars (const string &in_, string &out_)
+ {
+ const CharT *ptr_ = in_.c_str ();
+ std::size_t size_ = in_.size ();
+
+#if defined _MSC_VER && _MSC_VER <= 1200
+ out_.erase ();
+#else
+ out_.clear ();
+#endif
+
+ while (size_)
+ {
+ switch (*ptr_)
+ {
+ case '\0':
+ out_ += '\\';
+ out_ += '0';
+ break;
+ case '\a':
+ out_ += '\\';
+ out_ += 'a';
+ break;
+ case '\b':
+ out_ += '\\';
+ out_ += 'b';
+ break;
+ case 27:
+ out_ += '\\';
+ out_ += 'x';
+ out_ += '1';
+ out_ += 'b';
+ break;
+ case '\f':
+ out_ += '\\';
+ out_ += 'f';
+ break;
+ case '\n':
+ out_ += '\\';
+ out_ += 'n';
+ break;
+ case '\r':
+ out_ += '\\';
+ out_ += 'r';
+ break;
+ case '\t':
+ out_ += '\\';
+ out_ += 't';
+ break;
+ case '\v':
+ out_ += '\\';
+ out_ += 'v';
+ break;
+ case '\\':
+ out_ += '\\';
+ out_ += '\\';
+ break;
+ case '"':
+ out_ += '\\';
+ out_ += '"';
+ break;
+ default:
+ {
+ if (*ptr_ < 32 && *ptr_ >= 0)
+ {
+ stringstream ss_;
+
+ out_ += '\\';
+ out_ += 'x';
+ ss_ << std::hex <<
+ static_cast<std::size_t> (*ptr_);
+ out_ += ss_.str ();
+ }
+ else
+ {
+ out_ += *ptr_;
+ }
+
+ break;
+ }
+ }
+
+ ++ptr_;
+ --size_;
+ }
+ }
+
+ static void dump (const basic_state_machine<CharT> &state_machine_, ostream &stream_)
+ {
+ typename basic_state_machine<CharT>::iterator iter_ =
+ state_machine_.begin ();
+ typename basic_state_machine<CharT>::iterator end_ =
+ state_machine_.end ();
+
+ for (std::size_t dfa_ = 0, dfas_ = state_machine_.size ();
+ dfa_ < dfas_; ++dfa_)
+ {
+ const std::size_t states_ = iter_->states;
+
+ for (std::size_t i_ = 0; i_ < states_; ++i_)
+ {
+ state (stream_);
+ stream_ << i_ << std::endl;
+
+ if (iter_->end_state)
+ {
+ end_state (stream_);
+ stream_ << iter_->id;
+ dfa (stream_);
+ stream_ << iter_->goto_dfa;
+ stream_ << std::endl;
+ }
+
+ if (iter_->bol_index != npos)
+ {
+ bol (stream_);
+ stream_ << iter_->bol_index << std::endl;
+ }
+
+ if (iter_->eol_index != npos)
+ {
+ eol (stream_);
+ stream_ << iter_->eol_index << std::endl;
+ }
+
+ const std::size_t transitions_ = iter_->transitions;
+
+ if (transitions_ == 0)
+ {
+ ++iter_;
+ }
+
+ for (std::size_t t_ = 0; t_ < transitions_; ++t_)
+ {
+ std::size_t goto_state_ = iter_->goto_state;
+
+ if (iter_->token.any ())
+ {
+ any (stream_);
+ }
+ else
+ {
+ open_bracket (stream_);
+
+ if (iter_->token._negated)
+ {
+ negated (stream_);
+ }
+
+ string charset_;
+ CharT c_ = 0;
+
+ escape_control_chars (iter_->token._charset,
+ charset_);
+ c_ = *charset_.c_str ();
+
+ if (!iter_->token._negated &&
+ (c_ == '^' || c_ == ']'))
+ {
+ stream_ << '\\';
+ }
+
+ stream_ << charset_;
+ close_bracket (stream_);
+ }
+
+ stream_ << goto_state_ << std::endl;
+ ++iter_;
+ }
+
+ stream_ << std::endl;
+ }
+ }
+ }
+
+protected:
+ typedef std::basic_stringstream<CharT> stringstream;
+
+ static void state (std::basic_ostream<char> &stream_)
+ {
+ stream_ << "State: ";
+ }
+
+ static void state (std::basic_ostream<wchar_t> &stream_)
+ {
+ stream_ << L"State: ";
+ }
+
+ static void bol (std::basic_ostream<char> &stream_)
+ {
+ stream_ << " BOL -> ";
+ }
+
+ static void bol (std::basic_ostream<wchar_t> &stream_)
+ {
+ stream_ << L" BOL -> ";
+ }
+
+ static void eol (std::basic_ostream<char> &stream_)
+ {
+ stream_ << " EOL -> ";
+ }
+
+ static void eol (std::basic_ostream<wchar_t> &stream_)
+ {
+ stream_ << L" EOL -> ";
+ }
+
+ static void end_state (std::basic_ostream<char> &stream_)
+ {
+ stream_ << " END STATE, Id = ";
+ }
+
+ static void end_state (std::basic_ostream<wchar_t> &stream_)
+ {
+ stream_ << L" END STATE, Id = ";
+ }
+
+ static void any (std::basic_ostream<char> &stream_)
+ {
+ stream_ << " . -> ";
+ }
+
+ static void any (std::basic_ostream<wchar_t> &stream_)
+ {
+ stream_ << L" . -> ";
+ }
+
+ static void open_bracket (std::basic_ostream<char> &stream_)
+ {
+ stream_ << " [";
+ }
+
+ static void open_bracket (std::basic_ostream<wchar_t> &stream_)
+ {
+ stream_ << L" [";
+ }
+
+ static void negated (std::basic_ostream<char> &stream_)
+ {
+ stream_ << "^";
+ }
+
+ static void negated (std::basic_ostream<wchar_t> &stream_)
+ {
+ stream_ << L"^";
+ }
+
+ static void close_bracket (std::basic_ostream<char> &stream_)
+ {
+ stream_ << "] -> ";
+ }
+
+ static void close_bracket (std::basic_ostream<wchar_t> &stream_)
+ {
+ stream_ << L"] -> ";
+ }
+
+ static void dfa (std::basic_ostream<char> &stream_)
+ {
+ stream_ << ", dfa = ";
+ }
+
+ static void dfa (std::basic_ostream<wchar_t> &stream_)
+ {
+ stream_ << L", dfa = ";
+ }
+};
+
+typedef basic_debug<char> debug;
+typedef basic_debug<wchar_t> wdebug;
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/file_input.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/file_input.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,456 @@
+// file_input.hpp
+// Copyright (c) 2008 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_FILE_INPUT
+#define BOOST_LEXER_FILE_INPUT
+
+#include "char_traits.hpp"
+#include <fstream>
+#include "size_t.hpp"
+#include "state_machine.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+template<typename CharT, typename Traits = char_traits<CharT> >
+class basic_file_input
+{
+public:
+ class iterator
+ {
+ public:
+#if defined _MSC_VER && _MSC_VER <= 1200
+ friend basic_file_input;
+#else
+ friend class basic_file_input;
+#endif
+
+ struct data
+ {
+ std::size_t id;
+ const CharT *start;
+ const CharT *end;
+ std::size_t state;
+
+ // Construct in end() state.
+ data () :
+ id (0),
+ state (npos)
+ {
+ }
+
+ bool operator == (const data &rhs_) const
+ {
+ return id == rhs_.id && start == rhs_.start &&
+ end == rhs_.end && state == rhs_.state;
+ }
+ };
+
+ iterator () :
+ _input (0)
+ {
+ }
+
+ bool operator == (const iterator &rhs_) const
+ {
+ return _data == rhs_._data;
+ }
+
+ bool operator != (const iterator &rhs_) const
+ {
+ return !(*this == rhs_);
+ }
+
+ data &operator * ()
+ {
+ return _data;
+ }
+
+ data *operator -> ()
+ {
+ return &_data;
+ }
+
+ // Let compiler generate operator = ().
+
+ // prefix version
+ iterator &operator ++ ()
+ {
+ next_token ();
+ return *this;
+ }
+
+ // postfix version
+ iterator operator ++ (int)
+ {
+ iterator iter_ = *this;
+
+ next_token ();
+ return iter_;
+ }
+
+ void next_token ()
+ {
+ _data.start = _data.end;
+
+ if (_input->_state_machine->_dfa->size () == 1)
+ {
+ _data.id = _input->next (&_input->_state_machine->_lookup->
+ front ()->front (), _input->_state_machine->_dfa_alphabet.
+ front (), &_input->_state_machine->_dfa->front ()->
+ front (), _data.start, _data.end);
+ }
+ else
+ {
+ _data.id = _input->next (*_input->_state_machine, _data.state,
+ _data.start, _data.end);
+ }
+
+ if (_data.id == 0)
+ {
+ _data.start = 0;
+ _data.end = 0;
+ // Ensure current state matches that returned by end().
+ _data.state = npos;
+ }
+ }
+
+ private:
+ // Not owner (obviously!)
+ basic_file_input *_input;
+ data _data;
+ };
+
+#if defined _MSC_VER && _MSC_VER <= 1200
+ friend iterator;
+#else
+ friend class iterator;
+#endif
+
+ // Make it explict that we are NOT taking a copy of state_machine_!
+ basic_file_input (const basic_state_machine<CharT> *state_machine_,
+ std::basic_ifstream<CharT> *is_,
+ const std::streamsize buffer_size_ = 4096,
+ const std::streamsize buffer_increment_ = 1024) :
+ _state_machine (state_machine_),
+ _stream (is_),
+ _buffer_size (buffer_size_),
+ _buffer_increment (buffer_increment_),
+ _buffer (_buffer_size, '!')
+ {
+ _start_buffer = &_buffer.front ();
+ _end_buffer = _start_buffer + _buffer.size ();
+ _start_token = _end_buffer;
+ _end_token = _end_buffer;
+ }
+
+ iterator begin ()
+ {
+ iterator iter_;
+
+ iter_._input = this;
+ iter_._data.id = npos;
+ iter_._data.start = 0;
+ iter_._data.end = 0;
+ iter_._data.state = 0;
+ ++iter_;
+ return iter_;
+ }
+
+ iterator end ()
+ {
+ iterator iter_;
+
+ iter_._input = this;
+ iter_._data.start = 0;
+ iter_._data.end = 0;
+ return iter_;
+ }
+
+ void flush ()
+ {
+ // This temporary is mandatory, otherwise the
+ // pointer calculations won't work!
+ const CharT *temp_ = _end_buffer;
+
+ _start_token = _end_token = _end_buffer;
+ reload_buffer (temp_, true, _end_token);
+ }
+
+private:
+ typedef std::basic_istream<CharT> istream;
+ typedef std::vector<CharT> buffer;
+
+ const basic_state_machine<CharT> *_state_machine;
+ const std::streamsize _buffer_size;
+ const std::streamsize _buffer_increment;
+
+ buffer _buffer;
+ CharT *_start_buffer;
+ istream *_stream;
+ const CharT *_start_token;
+ const CharT *_end_token;
+ CharT *_end_buffer;
+
+ std::size_t next (const basic_state_machine<CharT> &state_machine_,
+ std::size_t &start_state_, const CharT * &start_, const CharT * &end_)
+ {
+ _start_token = _end_token;
+
+again:
+ const std::size_t * lookup_ = &state_machine_._lookup[start_state_]->
+ front ();
+ std::size_t dfa_alphabet_ = state_machine_._dfa_alphabet[start_state_];
+ const std::size_t *dfa_ = &state_machine_._dfa[start_state_]->front ();
+ const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
+ const CharT *curr_ = _start_token;
+ bool end_state_ = *ptr_ != 0;
+ std::size_t id_ = *(ptr_ + id_index);
+ const CharT *end_token_ = curr_;
+
+ for (;;)
+ {
+ if (curr_ >= _end_buffer)
+ {
+ if (!reload_buffer (curr_, end_state_, end_token_))
+ {
+ // EOF
+ break;
+ }
+ }
+
+ const std::size_t BOL_state_ = ptr_[bol_index];
+ const std::size_t EOL_state_ = ptr_[eol_index];
+
+ if (BOL_state_ && (_start_token == _start_buffer ||
+ *(_start_token - 1) == '\n'))
+ {
+ ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
+ }
+ else if (EOL_state_ && *curr_ == '\n')
+ {
+ ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
+ }
+ else
+ {
+ const std::size_t state_ =
+ ptr_[lookup_[static_cast<typename Traits::index_type> (*curr_++)]];
+
+ if (state_ == 0)
+ {
+ break;
+ }
+
+ ptr_ = &dfa_[state_ * dfa_alphabet_];
+ }
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ start_state_ = *(ptr_ + state_index);
+ end_token_ = curr_;
+ }
+ }
+
+ if (_start_token >= _end_buffer)
+ {
+ // No more tokens...
+ return 0;
+ }
+
+ const std::size_t EOL_state_ = ptr_[eol_index];
+
+ if (EOL_state_ && curr_ == end_)
+ {
+ ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ start_state_ = *(ptr_ + state_index);
+ end_token_ = curr_;
+ }
+ }
+
+ if (end_state_)
+ {
+ // return longest match
+ _end_token = end_token_;
+
+ if (id_ == 0) goto again;
+ }
+ else
+ {
+ // No match causes char to be skipped
+ _end_token = _start_token + 1;
+ id_ = npos;
+ }
+
+ start_ = _start_token;
+ end_ = _end_token;
+ return id_;
+ }
+
+ std::size_t next (const std::size_t * const lookup_,
+ const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
+ const CharT * &start_, const CharT * &end_)
+ {
+ _start_token = _end_token;
+
+ const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
+ const CharT *curr_ = _start_token;
+ bool end_state_ = *ptr_ != 0;
+ std::size_t id_ = id_ = *(ptr_ + id_index);
+ const CharT *end_token_ = curr_;
+
+ for (;;)
+ {
+ if (curr_ >= _end_buffer)
+ {
+ if (!reload_buffer (curr_, end_state_, end_token_))
+ {
+ // EOF
+ break;
+ }
+ }
+
+ const std::size_t BOL_state_ = ptr_[bol_index];
+ const std::size_t EOL_state_ = ptr_[eol_index];
+
+ if (BOL_state_ && (_start_token == _start_buffer ||
+ *(_start_token - 1) == '\n'))
+ {
+ ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
+ }
+ else if (EOL_state_ && *curr_ == '\n')
+ {
+ ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
+ }
+ else
+ {
+ const std::size_t state_ =
+ ptr_[lookup_[static_cast<typename Traits::index_type> (*curr_++)]];
+
+ if (state_ == 0)
+ {
+ break;
+ }
+
+ ptr_ = &dfa_[state_ * dfa_alphabet_];
+ }
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ end_token_ = curr_;
+ }
+ }
+
+ if (_start_token >= _end_buffer)
+ {
+ // No more tokens...
+ return 0;
+ }
+
+ const std::size_t EOL_state_ = ptr_[eol_index];
+
+ if (EOL_state_ && curr_ == end_)
+ {
+ ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ end_token_ = curr_;
+ }
+ }
+
+ if (end_state_)
+ {
+ // return longest match
+ _end_token = end_token_;
+ }
+ else
+ {
+ // No match causes char to be skipped
+ _end_token = _start_token + 1;
+ id_ = npos;
+ }
+
+ start_ = _start_token;
+ end_ = _end_token;
+ return id_;
+ }
+
+ bool reload_buffer (const CharT * &curr_, const bool end_state_,
+ const CharT * &end_token_)
+ {
+ bool success_ = !_stream->eof ();
+
+ if (success_)
+ {
+ const CharT *old_start_token_ = _start_token;
+ std::size_t old_size_ = _buffer.size ();
+ std::size_t count_ = 0;
+
+ if (_start_token - 1 == _start_buffer)
+ {
+ // Run out of buffer space, so increase.
+ _buffer.resize (old_size_ + _buffer_increment, '!');
+ _start_buffer = &_buffer.front ();
+ _start_token = _start_buffer + 1;
+ _stream->read (_start_buffer + old_size_,
+ _buffer_increment);
+ count_ = _stream->gcount ();
+ _end_buffer = _start_buffer + old_size_ + count_;
+ }
+ else if (_start_token < _end_buffer)
+ {
+ const std::size_t len_ = _end_buffer - _start_token;
+
+ ::memcpy (_start_buffer, _start_token - 1, (len_ + 1) * sizeof (CharT));
+ _stream->read (_start_buffer + len_ + 1,
+ static_cast<std::streamsize> (_buffer.size () - len_ - 1));
+ count_ = _stream->gcount ();
+ _start_token = _start_buffer + 1;
+ _end_buffer = _start_buffer + len_ + 1 + count_;
+ }
+ else
+ {
+ _stream->read (_start_buffer, static_cast<std::streamsize>
+ (_buffer.size ()));
+ count_ = _stream->gcount ();
+ _start_token = _start_buffer;
+ _end_buffer = _start_buffer + count_;
+ }
+
+ if (end_state_)
+ {
+ end_token_ = _start_token +
+ (end_token_ - old_start_token_);
+ }
+
+ curr_ = _start_token + (curr_ - old_start_token_);
+ }
+
+ return success_;
+ }
+
+ // Disallow copying of buffer
+ basic_file_input (const basic_file_input &);
+ const basic_file_input &operator = (const basic_file_input &);
+};
+
+typedef basic_file_input<char> file_input;
+typedef basic_file_input<wchar_t> wfile_input;
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/generate_cpp.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/generate_cpp.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,537 @@
+// generate_cpp_code.hpp
+// Copyright (c) 2008 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_GENERATE_CPP_CODE_HPP
+#define BOOST_LEXER_GENERATE_CPP_CODE_HPP
+
+#include "char_traits.hpp"
+#include "consts.hpp"
+#include <iostream>
+#include <boost/detail/iterator.hpp>
+#include "runtime_error.hpp"
+#include "size_t.hpp"
+#include "state_machine.hpp"
+#include <vector>
+
+namespace boost
+{
+namespace lexer
+{
+template<typename CharT>
+void generate_cpp (const basic_state_machine<CharT> &sm_, std::ostream &os_,
+ const bool use_pointers_ = false, const bool skip_unknown_ = true,
+ const bool optimise_parameters_ = true, const char *name_ = "next_token")
+{
+ if (sm_._lookup->size () == 0)
+ {
+ throw runtime_error ("Cannot generate code from an empty state machine");
+ }
+
+ std::string upper_name_ (__DATE__);
+ const std::size_t lookups_ = sm_._lookup->front ()->size ();
+ const std::size_t dfas_ = sm_._dfa->size ();
+ std::string::size_type pos_ = upper_name_.find (' ');
+ const char *iterator_ = 0;
+
+ if (use_pointers_)
+ {
+ if (lookups_ == 256)
+ {
+ iterator_ = "const char *";
+ }
+ else
+ {
+ iterator_ = "const wchar_t *";
+ }
+ }
+ else
+ {
+ iterator_ = "Iterator &";
+ }
+
+ while (pos_ != std::string::npos)
+ {
+ upper_name_.replace (pos_, 1, "_");
+ pos_ = upper_name_.find (' ', pos_);
+ }
+
+ upper_name_ += '_';
+ upper_name_ += __TIME__;
+
+ pos_ = upper_name_.find (':');
+
+ while (pos_ != std::string::npos)
+ {
+ upper_name_.erase (pos_, 1);
+ pos_ = upper_name_.find (':', pos_);
+ }
+
+ upper_name_ = '_' + upper_name_;
+ upper_name_ = name_ + upper_name_;
+ std::transform (upper_name_.begin (), upper_name_.end (),
+ upper_name_.begin (), ::toupper);
+ os_ << "#ifndef " << upper_name_ + '\n';
+ os_ << "#define " << upper_name_ + '\n';
+ os_ << "// Copyright (c) 2008 Ben Hanson\n";
+ os_ << "//\n";
+ os_ << "// Distributed under the Boost Software License, "
+ "Version 1.0. (See accompanying\n";
+ os_ << "// file licence_1_0.txt or copy at "
+ "http://www.boost.org/LICENSE_1_0.txt)\n\n";
+ os_ << "// Auto-generated by boost::lexer\n";
+ os_ << "template<typename Iterator>\n";
+ os_ << "std::size_t " << name_ << " (";
+
+ if (dfas_ > 1 || !optimise_parameters_)
+ {
+ os_ << "std::size_t &start_state_, ";
+ }
+
+ if (sm_._seen_BOL_assertion || !optimise_parameters_)
+ {
+ if (use_pointers_)
+ {
+ os_ << iterator_ << " const ";
+ }
+ else
+ {
+ os_ << "const " << iterator_;
+ }
+
+ os_ << "start_, ";
+ }
+
+ if (dfas_ > 1 || sm_._seen_BOL_assertion || !optimise_parameters_)
+ {
+ os_ << "\n ";
+ }
+
+ if (use_pointers_)
+ {
+ os_ << iterator_ << " &";
+ }
+ else
+ {
+ os_ << iterator_;
+ }
+
+ os_ << "start_token_, ";
+
+ if (use_pointers_)
+ {
+ os_ << iterator_ << " const ";
+ }
+ else
+ {
+ os_ << "const " << iterator_;
+ }
+
+ os_ << "end_)\n";
+ os_ << "{\n";
+ os_ << " enum {end_state_index, id_index, state_index, bol_index, "
+ "eol_index,\n";
+ os_ << " dead_state_index, dfa_offset};\n";
+ os_ << " static const std::size_t npos = static_cast"
+ "<std::size_t>(~0);\n";
+
+ if (dfas_ > 1)
+ {
+ std::size_t state_ = 0;
+
+ for (; state_ < dfas_; ++state_)
+ {
+ std::size_t i_ = 0;
+ std::size_t j_ = 1;
+ std::size_t count_ = lookups_ / 8;
+ const std::size_t *lookup_ = &sm_._lookup[state_]->front ();
+ const std::size_t *dfa_ = &sm_._dfa[state_]->front ();
+
+ os_ << " static const std::size_t lookup" << state_ << "_[" <<
+ lookups_ << "] = {";
+
+ for (; i_ < count_; ++i_)
+ {
+ const std::size_t index_ = i_ * 8;
+
+ os_ << lookup_[index_];
+
+ for (; j_ < 8; ++j_)
+ {
+ os_ << ", " << lookup_[index_ + j_];
+ }
+
+ if (i_ < count_ - 1)
+ {
+ os_ << "," << std::endl << " ";
+ }
+
+ j_ = 1;
+ }
+
+ os_ << "};\n";
+ count_ = sm_._dfa[state_]->size ();
+ os_ << " static const std::size_t dfa" << state_ << "_[" <<
+ count_ << "] = {";
+ count_ /= 8;
+
+ for (i_ = 0; i_ < count_; ++i_)
+ {
+ const std::size_t index_ = i_ * 8;
+
+ os_ << dfa_[index_];
+
+ for (j_ = 1; j_ < 8; ++j_)
+ {
+ os_ << ", " << dfa_[index_ + j_];
+ }
+
+ if (i_ < count_ - 1)
+ {
+ os_ << "," << std::endl << " ";
+ }
+ }
+
+ const std::size_t mod_ = sm_._dfa[state_]->size () % 8;
+
+ if (mod_)
+ {
+ const std::size_t index_ = count_ * 8;
+
+ if (count_)
+ {
+ os_ << ",\n ";
+ }
+
+ os_ << dfa_[index_];
+
+ for (j_ = 1; j_ < mod_; ++j_)
+ {
+ os_ << ", " << dfa_[index_ + j_];
+ }
+ }
+
+ os_ << "};\n";
+ }
+
+ std::size_t count_ = sm_._dfa_alphabet.size ();
+ std::size_t i_ = 1;
+
+ os_ << " static const std::size_t *lookup_arr_[" << count_ <<
+ "] = {";
+ os_ << "lookup0_";
+
+ for (i_ = 1; i_ < count_; ++i_)
+ {
+ os_ << ", " << "lookup" << i_ << "_";
+ }
+
+ os_ << "};\n";
+ os_ << " static const std::size_t dfa_alphabet_arr_[" << count_ <<
+ "] = {";
+ os_ << sm_._dfa_alphabet.front ();
+
+ for (i_ = 1; i_ < count_; ++i_)
+ {
+ os_ << ", " << sm_._dfa_alphabet[i_];
+ }
+
+ os_ << "};\n";
+ os_ << " static const std::size_t *dfa_arr_[" << count_ <<
+ "] = {";
+ os_ << "dfa0_";
+
+ for (i_ = 1; i_ < count_; ++i_)
+ {
+ os_ << ", " << "dfa" << i_ << "_";
+ }
+
+ os_ << "};\n";
+ }
+ else
+ {
+ const std::size_t *lookup_ = &sm_._lookup[0]->front ();
+ const std::size_t *dfa_ = &sm_._dfa[0]->front ();
+ std::size_t i_ = 0;
+ std::size_t j_ = 1;
+ std::size_t count_ = lookups_ / 8;
+
+ os_ << " static const std::size_t lookup_[";
+ os_ << sm_._lookup[0]->size () << "] = {";
+
+ for (; i_ < count_; ++i_)
+ {
+ const std::size_t index_ = i_ * 8;
+
+ os_ << lookup_[index_];
+
+ for (; j_ < 8; ++j_)
+ {
+ os_ << ", " << lookup_[index_ + j_];
+ }
+
+ if (i_ < count_ - 1)
+ {
+ os_ << "," << std::endl << " ";
+ }
+
+ j_ = 1;
+ }
+
+ os_ << "};\n";
+ os_ << " static const std::size_t dfa_alphabet_ = " <<
+ sm_._dfa_alphabet.front () << ";\n";
+ os_ << " static const std::size_t dfa_[" <<
+ sm_._dfa[0]->size () << "] = {";
+ count_ = sm_._dfa[0]->size () / 8;
+
+ for (i_ = 0; i_ < count_; ++i_)
+ {
+ const std::size_t index_ = i_ * 8;
+
+ os_ << dfa_[index_];
+
+ for (j_ = 1; j_ < 8; ++j_)
+ {
+ os_ << ", " << dfa_[index_ + j_];
+ }
+
+ if (i_ < count_ - 1)
+ {
+ os_ << "," << std::endl << " ";
+ }
+ }
+
+ const std::size_t mod_ = sm_._dfa[0]->size () % 8;
+
+ if (mod_)
+ {
+ const std::size_t index_ = count_ * 8;
+
+ if (count_)
+ {
+ os_ << ",\n ";
+ }
+
+ os_ << dfa_[index_];
+
+ for (j_ = 1; j_ < mod_; ++j_)
+ {
+ os_ << ", " << dfa_[index_ + j_];
+ }
+ }
+
+ os_ << "};\n";
+ }
+
+ os_ << "\n if (start_token_ == end_) return 0;\n\n";
+
+ if (dfas_ > 1)
+ {
+ os_ << "again:\n";
+ os_ << " const std::size_t * lookup_ = lookup_arr_[start_state_];\n";
+ os_ << " std::size_t dfa_alphabet_ = dfa_alphabet_arr_[start_state_];\n";
+ os_ << " const std::size_t *dfa_ = dfa_arr_[start_state_];\n";
+ }
+
+ os_ << " const std::size_t *ptr_ = dfa_ + dfa_alphabet_;\n";
+ os_ << " Iterator curr_ = start_token_;\n";
+ os_ << " bool end_state_ = *ptr_ != 0;\n";
+ os_ << " std::size_t id_ = *(ptr_ + id_index);\n";
+ os_ << " Iterator end_token_ = start_token_;\n";
+ os_ << '\n';
+ os_ << " while (curr_ != end_)\n";
+ os_ << " {\n";
+
+ if (sm_._seen_BOL_assertion)
+ {
+ os_ << " const std::size_t BOL_state_ = ptr_[bol_index];\n";
+ }
+
+ if (sm_._seen_EOL_assertion)
+ {
+ os_ << " const std::size_t EOL_state_ = ptr_[eol_index];\n";
+ }
+
+ if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
+ {
+ os_ << '\n';
+ }
+
+ if (sm_._seen_BOL_assertion && sm_._seen_EOL_assertion)
+ {
+ os_ << " if (BOL_state_ && (start_token_ == start_ ||\n";
+ os_ << " *(start_token_ - 1) == '\\n'))\n";
+ os_ << " {\n";
+ os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
+ os_ << " }\n";
+ os_ << " else if (EOL_state_ && *curr_ == '\\n')\n";
+ os_ << " {\n";
+ os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
+ os_ << " }\n";
+ os_ << " else\n";
+ os_ << " {\n";
+ os_ << " const std::size_t state_ =\n";
+
+ if (lookups_ == 256)
+ {
+ os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
+ os_ << " (*curr_++)]];\n";
+ }
+ else
+ {
+ os_ << " ptr_[lookup_[*curr_++]];\n";
+ }
+
+ os_ << '\n';
+ os_ << " if (state_ == 0) break;\n";
+ os_ << '\n';
+ os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+ os_ << " }\n";
+ }
+ else if (sm_._seen_BOL_assertion)
+ {
+ os_ << " if (BOL_state_ && (start_token_ == start_ ||\n";
+ os_ << " *(start_token_ - 1) == '\\n'))\n";
+ os_ << " {\n";
+ os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
+ os_ << " }\n";
+ os_ << " else\n";
+ os_ << " {\n";
+ os_ << " const std::size_t state_ =\n";
+
+ if (lookups_ == 256)
+ {
+ os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
+ os_ << " (*curr_++)]];\n";
+ }
+ else
+ {
+ os_ << " ptr_[lookup_[*curr_++]];\n";
+ }
+
+ os_ << '\n';
+ os_ << " if (state_ == 0) break;\n";
+ os_ << '\n';
+ os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+ os_ << " }\n";
+ }
+ else if (sm_._seen_EOL_assertion)
+ {
+ os_ << " if (EOL_state_ && *curr_ == '\\n')\n";
+ os_ << " {\n";
+ os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
+ os_ << " }\n";
+ os_ << " else\n";
+ os_ << " {\n";
+ os_ << " const std::size_t state_ =\n";
+
+ if (lookups_ == 256)
+ {
+ os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
+ os_ << " (*curr_++)]];\n";
+ }
+ else
+ {
+ os_ << " ptr_[lookup_[*curr_++]];\n";
+ }
+
+ os_ << '\n';
+ os_ << " if (state_ == 0) break;\n";
+ os_ << '\n';
+ os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+ os_ << " }\n";
+ }
+ else
+ {
+ os_ << " const std::size_t state_ =\n";
+
+ if (lookups_ == 256)
+ {
+ os_ << " ptr_[lookup_[static_cast<unsigned char>\n";
+ os_ << " (*curr_++)]];\n";
+ }
+ else
+ {
+ os_ << " ptr_[lookup_[*curr_++]];\n";
+ }
+
+ os_ << '\n';
+ os_ << " if (state_ == 0) break;\n";
+ os_ << '\n';
+ os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+ }
+
+ os_ << '\n';
+ os_ << " if (*ptr_)\n";
+ os_ << " {\n";
+ os_ << " end_state_ = true;\n";
+ os_ << " id_ = *(ptr_ + id_index);\n";
+
+ if (dfas_ > 1)
+ {
+ os_ << " start_state_ = *(ptr_ + state_index);\n";
+ }
+
+ os_ << " end_token_ = curr_;\n";
+ os_ << " }\n";
+ os_ << " }\n";
+ os_ << '\n';
+
+ if (sm_._seen_EOL_assertion)
+ {
+ os_ << " const std::size_t EOL_state_ = ptr_[eol_index];\n";
+ os_ << '\n';
+ os_ << " if (EOL_state_ && curr_ == end_)\n";
+ os_ << " {\n";
+ os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
+ os_ << '\n';
+ os_ << " if (*ptr_)\n";
+ os_ << " {\n";
+ os_ << " end_state_ = true;\n";
+ os_ << " id_ = *(ptr_ + id_index);\n";
+
+ if (dfas_ > 1)
+ {
+ os_ << " start_state_ = *(ptr_ + state_index);\n";
+ }
+
+ os_ << " end_token_ = curr_;\n";
+ os_ << " }\n";
+ os_ << " }\n";
+ os_ << '\n';
+ }
+
+ os_ << " if (end_state_)\n";
+ os_ << " {\n";
+ os_ << " // return longest match\n";
+ os_ << " start_token_ = end_token_;\n";
+
+ if (dfas_ > 1)
+ {
+ os_ << '\n';
+ os_ << " if (id_ == 0) goto again;\n";
+ }
+
+ os_ << " }\n";
+ os_ << " else\n";
+ os_ << " {\n";
+
+ if (skip_unknown_)
+ {
+ os_ << " // No match causes char to be skipped\n";
+ os_ << " ++start_token_;\n";
+ }
+
+ os_ << " id_ = npos;\n";
+ os_ << " }\n";
+ os_ << '\n';
+ os_ << " return id_;\n";
+ os_ << "}\n";
+ os_ << "\n#endif\n";
+}
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/generator.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/generator.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,824 @@
+// generator.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_GENERATOR_HPP
+#define BOOST_LEXER_GENERATOR_HPP
+
+#include "char_traits.hpp"
+// memcmp()
+#include <cstring>
+#include "partition/charset.hpp"
+#include "partition/equivset.hpp"
+#include <memory>
+#include "parser/tree/node.hpp"
+#include "parser/parser.hpp"
+#include "containers/ptr_list.hpp"
+#include "rules.hpp"
+#include "state_machine.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+template<typename CharT, typename Traits = char_traits<CharT> >
+class basic_generator
+{
+public:
+ typedef typename basic_state_machine<CharT>::size_t_vector size_t_vector;
+ typedef basic_rules<CharT> rules;
+
+ static void build (const rules &rules_, basic_state_machine<CharT> &state_machine_)
+ {
+ std::size_t index_ = 0;
+ std::size_t size_ = rules_.statemap ().size ();
+ node_ptr_vector node_ptr_vector_;
+
+ state_machine_.clear ();
+
+ for (; index_ < size_; ++index_)
+ {
+ state_machine_._lookup->push_back (0);
+ state_machine_._lookup->back () = new size_t_vector;
+ state_machine_._dfa_alphabet.push_back (0);
+ state_machine_._dfa->push_back (0);
+ state_machine_._dfa->back () = new size_t_vector;
+ }
+
+ for (index_ = 0, size_ = state_machine_._lookup->size ();
+ index_ < size_; ++index_)
+ {
+ state_machine_._lookup[index_]->resize (sizeof (CharT) == 1 ?
+ num_chars : num_wchar_ts, dead_state_index);
+
+ if (!rules_.regexes ()[index_].empty ())
+ {
+ // vector mapping token indexes to partitioned token index sets
+ index_set_vector set_mapping_;
+ // syntax tree
+ detail::node *root_ = build_tree (rules_, index_,
+ node_ptr_vector_, state_machine_._lookup[index_],
+ set_mapping_, state_machine_._dfa_alphabet[index_],
+ state_machine_._seen_BOL_assertion,
+ state_machine_._seen_EOL_assertion);
+
+ build_dfa (root_, set_mapping_,
+ state_machine_._dfa_alphabet[index_],
+ *state_machine_._dfa[index_]);
+ }
+ }
+ }
+
+ static void minimise (basic_state_machine<CharT> &state_machine_)
+ {
+ const std::size_t machines_ = state_machine_._dfa->size ();
+
+ for (std::size_t i_ = 0; i_ < machines_; ++i_)
+ {
+ const std::size_t dfa_alphabet_ = state_machine_._dfa_alphabet[i_];
+ size_t_vector *dfa_ = state_machine_._dfa[i_];
+
+ if (dfa_alphabet_ != 0)
+ {
+ std::size_t size_ = 0;
+
+ do
+ {
+ size_ = dfa_->size ();
+ minimise_dfa (dfa_alphabet_, *dfa_, size_);
+ } while (dfa_->size () != size_);
+ }
+ }
+ }
+
+protected:
+ typedef detail::basic_charset<CharT> charset;
+ typedef detail::ptr_list<charset> charset_list;
+ typedef std::auto_ptr<charset> charset_ptr;
+ typedef detail::equivset equivset;
+ typedef detail::ptr_list<equivset> equivset_list;
+ typedef std::auto_ptr<equivset> equivset_ptr;
+ typedef typename charset::index_set index_set;
+ typedef std::vector<index_set> index_set_vector;
+ typedef detail::basic_parser<CharT> parser;
+ typedef typename parser::node_ptr_vector node_ptr_vector;
+ typedef std::set<const detail::node *> node_set;
+ typedef detail::ptr_vector<node_set> node_set_vector;
+ typedef std::vector<const detail::node *> node_vector;
+ typedef detail::ptr_vector<node_vector> node_vector_vector;
+ typedef typename parser::string string;
+ typedef std::pair<string, string> string_pair;
+ typedef typename parser::tokeniser::string_token string_token;
+ typedef std::deque<string_pair> macro_deque;
+ typedef std::pair<string, const detail::node *> macro_pair;
+ typedef typename parser::macro_map::iterator macro_iter;
+ typedef std::pair<macro_iter, bool> macro_iter_pair;
+ typedef typename parser::tokeniser::token_map token_map;
+
+ static detail::node *build_tree (const rules &rules_,
+ const std::size_t state_, node_ptr_vector &node_ptr_vector_,
+ size_t_vector *lookup_, index_set_vector &set_mapping_,
+ std::size_t &dfa_alphabet_, bool &seen_BOL_assertion_,
+ bool &seen_EOL_assertion_)
+ {
+ const typename rules::string_deque_deque &regexes_ =
+ rules_.regexes ();
+ const typename rules::id_vector_deque &ids_ = rules_.ids ();
+ const typename rules::id_vector_deque &states_ = rules_.states ();
+ typename rules::string_deque::const_iterator regex_iter_ =
+ regexes_[state_].begin ();
+ typename rules::string_deque::const_iterator regex_iter_end_ =
+ regexes_[state_].end ();
+ typename rules::id_vector::const_iterator ids_iter_ =
+ ids_[state_].begin ();
+ typename rules::id_vector::const_iterator states_iter_ =
+ states_[state_].begin ();
+ const typename rules::string &regex_ = *regex_iter_;
+ // map of regex charset tokens (strings) to index
+ token_map token_map_;
+ const typename rules::string_pair_deque &macrodeque_ =
+ rules_.macrodeque ();
+ typename parser::macro_map macromap_;
+ typename detail::node::node_vector tree_vector_;
+
+ build_macros (token_map_, macrodeque_, macromap_,
+ rules_.case_sensitive (), rules_.locale (), node_ptr_vector_,
+ rules_.dot_not_newline (), seen_BOL_assertion_,
+ seen_EOL_assertion_);
+
+ detail::node *root_ = parser::parse (regex_.c_str (),
+ regex_.c_str () + regex_.size (), *ids_iter_, *states_iter_,
+ rules_.case_sensitive (), rules_.dot_not_newline (),
+ rules_.locale (), node_ptr_vector_, macromap_, token_map_,
+ seen_BOL_assertion_, seen_EOL_assertion_);
+
+ ++regex_iter_;
+ ++ids_iter_;
+ ++states_iter_;
+ tree_vector_.push_back (root_);
+
+ // build syntax trees
+ while (regex_iter_ != regex_iter_end_)
+ {
+ // re-declare var, otherwise we perform an assignment..!
+ const typename rules::string &regex_ = *regex_iter_;
+
+ root_ = parser::parse (regex_.c_str (),
+ regex_.c_str () + regex_.size (), *ids_iter_,
+ *states_iter_, rules_.case_sensitive (),
+ rules_.dot_not_newline (), rules_.locale (), node_ptr_vector_,
+ macromap_, token_map_, seen_BOL_assertion_,
+ seen_EOL_assertion_);
+ tree_vector_.push_back (root_);
+ ++regex_iter_;
+ ++ids_iter_;
+ ++states_iter_;
+ }
+
+ if (seen_BOL_assertion_)
+ {
+ // Fixup BOLs
+ typename detail::node::node_vector::iterator iter_ =
+ tree_vector_.begin ();
+ typename detail::node::node_vector::iterator end_ =
+ tree_vector_.end ();
+
+ for (; iter_ != end_; ++iter_)
+ {
+ fixup_bol (*iter_, node_ptr_vector_);
+ }
+ }
+
+ // join trees
+ {
+ typename detail::node::node_vector::iterator iter_ =
+ tree_vector_.begin ();
+ typename detail::node::node_vector::iterator end_ =
+ tree_vector_.end ();
+
+ if (iter_ != end_)
+ {
+ root_ = *iter_;
+ ++iter_;
+ }
+
+ for (; iter_ != end_; ++iter_)
+ {
+ node_ptr_vector_->push_back (0);
+ node_ptr_vector_->back () = new detail::selection_node
+ (root_, *iter_);
+ root_ = node_ptr_vector_->back ();
+ }
+ }
+
+ // partitioned token list
+ charset_list token_list_;
+
+ set_mapping_.resize (token_map_.size ());
+ partition_tokens (token_map_, token_list_);
+
+ typename charset_list::list::const_iterator iter_ =
+ token_list_->begin ();
+ typename charset_list::list::const_iterator end_ =
+ token_list_->end ();
+ std::size_t index_ = 0;
+
+ for (; iter_ != end_; ++iter_, ++index_)
+ {
+ const charset *cs_ = *iter_;
+ typename charset::index_set::const_iterator set_iter_ =
+ cs_->_index_set.begin ();
+ typename charset::index_set::const_iterator set_end_ =
+ cs_->_index_set.end ();
+
+ fill_lookup (cs_->_token, lookup_, index_);
+
+ for (; set_iter_ != set_end_; ++set_iter_)
+ {
+ set_mapping_[*set_iter_].insert (index_);
+ }
+ }
+
+ dfa_alphabet_ = token_list_->size () + dfa_offset;
+ return root_;
+ }
+
+ static void build_macros (token_map &token_map_,
+ const macro_deque &macrodeque_,
+ typename parser::macro_map &macromap_, const bool case_sensitive_,
+ const std::locale &locale_, node_ptr_vector &node_ptr_vector_,
+ const bool not_dot_newline_, bool &seen_BOL_assertion_,
+ bool &seen_EOL_assertion_)
+ {
+ for (typename macro_deque::const_iterator iter_ =
+ macrodeque_.begin (), end_ = macrodeque_.end ();
+ iter_ != end_; ++iter_)
+ {
+ const typename rules::string &name_ = iter_->first;
+ const typename rules::string &regex_ = iter_->second;
+ detail::node *node_ = parser::parse (regex_.c_str (),
+ regex_.c_str () + regex_.size (), 0, 0, case_sensitive_,
+ not_dot_newline_, locale_, node_ptr_vector_, macromap_,
+ token_map_, seen_BOL_assertion_, seen_EOL_assertion_);
+ macro_iter_pair map_iter_ = macromap_.
+ insert (macro_pair (name_, 0));
+
+ map_iter_.first->second = node_;
+ }
+ }
+
+ static void build_dfa (detail::node *root_,
+ const index_set_vector &set_mapping_, const std::size_t dfa_alphabet_,
+ size_t_vector &dfa_)
+ {
+ typename detail::node::node_vector *followpos_ =
+ &root_->firstpos ();
+ node_set_vector seen_sets_;
+ node_vector_vector seen_vectors_;
+ size_t_vector hash_vector_;
+
+ // 'jam' state
+ dfa_.resize (dfa_alphabet_, 0);
+ closure (followpos_, seen_sets_, seen_vectors_,
+ hash_vector_, dfa_alphabet_, dfa_);
+
+ std::size_t *ptr_ = 0;
+
+ for (std::size_t index_ = 0; index_ < seen_vectors_->size (); ++index_)
+ {
+ equivset_list equiv_list_;
+
+ build_equiv_list (seen_vectors_[index_], set_mapping_, equiv_list_);
+
+ for (typename equivset_list::list::const_iterator iter_ =
+ equiv_list_->begin (), end_ = equiv_list_->end ();
+ iter_ != end_; ++iter_)
+ {
+ equivset *equivset_ = *iter_;
+ const std::size_t transition_ = closure (&equivset_->_followpos,
+ seen_sets_, seen_vectors_, hash_vector_, dfa_alphabet_, dfa_);
+
+ if (transition_ != npos)
+ {
+ ptr_ = &dfa_.front () + ((index_ + 1) * dfa_alphabet_);
+
+ // Prune abstemious transitions from end states.
+ if (*ptr_ && !equivset_->_greedy) continue;
+
+ for (typename detail::equivset::index_vector::const_iterator
+ equiv_iter_ = equivset_->_index_vector.begin (),
+ equiv_end_ = equivset_->_index_vector.end ();
+ equiv_iter_ != equiv_end_; ++equiv_iter_)
+ {
+ const std::size_t index_ = *equiv_iter_;
+
+ if (index_ == bol_token)
+ {
+ if (ptr_[eol_index] == 0)
+ {
+ ptr_[bol_index] = transition_;
+ }
+ }
+ else if (index_ == eol_token)
+ {
+ if (ptr_[bol_index] == 0)
+ {
+ ptr_[eol_index] = transition_;
+ }
+ }
+ else
+ {
+ ptr_[index_ + dfa_offset] = transition_;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ static std::size_t closure (typename detail::node::node_vector *followpos_,
+ node_set_vector &seen_sets_, node_vector_vector &seen_vectors_,
+ size_t_vector &hash_vector_, const std::size_t size_, size_t_vector &dfa_)
+ {
+ bool end_state_ = false;
+ std::size_t id_ = 0;
+ std::size_t state_ = 0;
+ std::size_t hash_ = 0;
+
+ if (followpos_->empty ()) return npos;
+
+ std::size_t index_ = 0;
+ std::auto_ptr<node_set> set_ptr_ (new node_set);
+ std::auto_ptr<node_vector> vector_ptr_ (new node_vector);
+
+ for (typename detail::node::node_vector::const_iterator iter_ =
+ followpos_->begin (), end_ = followpos_->end ();
+ iter_ != end_; ++iter_)
+ {
+ closure_ex (*iter_, end_state_, id_, state_, set_ptr_.get (),
+ vector_ptr_.get (), hash_);
+ }
+
+ bool found_ = false;
+
+ // Stop VC++ 2005 crashing...
+ if (!hash_vector_.empty ())
+ {
+ const std::size_t *hash_iter_ = &hash_vector_.front ();
+ const std::size_t *hash_end_ = hash_iter_ + hash_vector_.size ();
+ node_set **set_iter_ = &seen_sets_->front ();
+
+ for (; hash_iter_ != hash_end_; ++hash_iter_, ++set_iter_)
+ {
+ found_ = *hash_iter_ == hash_ && *(*set_iter_) == *set_ptr_;
+ ++index_;
+
+ if (found_) break;
+ }
+ }
+
+ if (!found_)
+ {
+ seen_sets_->push_back (0);
+ seen_sets_->back () = set_ptr_.release ();
+ seen_vectors_->push_back (0);
+ seen_vectors_->back () = vector_ptr_.release ();
+ hash_vector_.push_back (hash_);
+ // State 0 is the jam state...
+ index_ = seen_sets_->size ();
+
+ const std::size_t old_size_ = dfa_.size ();
+
+ dfa_.resize (old_size_ + size_, 0);
+
+ if (end_state_)
+ {
+ dfa_[old_size_] |= end_state;
+ dfa_[old_size_ + id_index] = id_;
+ dfa_[old_size_ + state_index] = state_;
+ }
+ }
+
+ return index_;
+ }
+
+ static void closure_ex (detail::node *node_, bool &end_state_,
+ std::size_t &id_, std::size_t &state_, node_set *set_ptr_,
+ node_vector *vector_ptr_, std::size_t &hash_)
+ {
+ const bool temp_end_state_ = node_->end_state ();
+
+ if (temp_end_state_)
+ {
+ if (!end_state_)
+ {
+ end_state_ = true;
+ id_ = node_->id ();
+ state_ = node_->lexer_state ();
+ }
+ }
+
+ if (set_ptr_->insert (node_).second)
+ {
+ vector_ptr_->push_back (node_);
+ hash_ += reinterpret_cast<std::size_t> (node_);
+ }
+ }
+
+ static void partition_tokens (const token_map &map_,
+ charset_list &lhs_)
+ {
+ charset_list rhs_;
+
+ fill_rhs_list (map_, rhs_);
+
+ if (!rhs_->empty ())
+ {
+ typename charset_list::list::iterator iter_;
+ typename charset_list::list::iterator end_;
+ charset_ptr overlap_ (new charset);
+
+ lhs_->push_back (0);
+ lhs_->back () = rhs_->front ();
+ rhs_->pop_front ();
+
+ while (!rhs_->empty ())
+ {
+ charset_ptr r_ (rhs_->front ());
+
+ rhs_->pop_front ();
+ iter_ = lhs_->begin ();
+ end_ = lhs_->end ();
+
+ while (!r_->empty () && iter_ != end_)
+ {
+ typename charset_list::list::iterator l_iter_ = iter_;
+
+ (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
+
+ if (overlap_->empty ())
+ {
+ ++iter_;
+ }
+ else if ((*l_iter_)->empty ())
+ {
+ delete *l_iter_;
+ *l_iter_ = overlap_.release ();
+
+ // VC++ 6 Hack:
+ charset_ptr temp_overlap_ (new charset);
+
+ overlap_ = temp_overlap_;
+ ++iter_;
+ }
+ else if (r_->empty ())
+ {
+ delete r_.release ();
+ r_ = overlap_;
+
+ // VC++ 6 Hack:
+ charset_ptr temp_overlap_ (new charset);
+
+ overlap_ = temp_overlap_;
+ break;
+ }
+ else
+ {
+ iter_ = lhs_->insert (++iter_, 0);
+ *iter_ = overlap_.release ();
+
+ // VC++ 6 Hack:
+ charset_ptr temp_overlap_ (new charset);
+
+ overlap_ = temp_overlap_;
+ ++iter_;
+ end_ = lhs_->end ();
+ }
+ }
+
+ if (!r_->empty ())
+ {
+ lhs_->push_back (0);
+ lhs_->back () = r_.release ();
+ }
+ }
+ }
+ }
+
+ static void fill_rhs_list (const token_map &map_,
+ charset_list &list_)
+ {
+ typename parser::tokeniser::token_map::const_iterator iter_ =
+ map_.begin ();
+ typename parser::tokeniser::token_map::const_iterator end_ =
+ map_.end ();
+
+ for (; iter_ != end_; ++iter_)
+ {
+ list_->push_back (0);
+ list_->back () = new charset (iter_->first, iter_->second);
+ }
+ }
+
+ static void fill_lookup (const string_token &token_,
+ size_t_vector *lookup_, const std::size_t index_)
+ {
+ const CharT *curr_ = token_._charset.c_str ();
+ const CharT *chars_end_ = curr_ + token_._charset.size ();
+ std::size_t *ptr_ = &lookup_->front ();
+ const std::size_t max_ = sizeof (CharT) == 1 ?
+ num_chars : num_wchar_ts;
+
+ if (token_._negated)
+ {
+ CharT curr_char_ = sizeof (CharT) == 1 ? -128 : 0;
+ std::size_t i_ = 0;
+
+ while (curr_ < chars_end_)
+ {
+ while (*curr_ > curr_char_)
+ {
+ ptr_[static_cast<typename Traits::index_type>
+ (curr_char_)] = index_ + dfa_offset;
+ ++curr_char_;
+ ++i_;
+ }
+
+ ++curr_char_;
+ ++curr_;
+ ++i_;
+ }
+
+ for (; i_ < max_; ++i_)
+ {
+ ptr_[static_cast<typename Traits::index_type>(curr_char_)] =
+ index_ + dfa_offset;
+ ++curr_char_;
+ }
+ }
+ else
+ {
+ while (curr_ < chars_end_)
+ {
+ ptr_[static_cast<typename Traits::index_type>(*curr_)] =
+ index_ + dfa_offset;
+ ++curr_;
+ }
+ }
+ }
+
+ static void build_equiv_list (const node_vector *vector_,
+ const index_set_vector &set_mapping_, equivset_list &lhs_)
+ {
+ equivset_list rhs_;
+
+ fill_rhs_list (vector_, set_mapping_, rhs_);
+
+ if (!rhs_->empty ())
+ {
+ typename equivset_list::list::iterator iter_;
+ typename equivset_list::list::iterator end_;
+ equivset_ptr overlap_ (new equivset);
+
+ lhs_->push_back (0);
+ lhs_->back () = rhs_->front ();
+ rhs_->pop_front ();
+
+ while (!rhs_->empty ())
+ {
+ equivset_ptr r_ (rhs_->front ());
+
+ rhs_->pop_front ();
+ iter_ = lhs_->begin ();
+ end_ = lhs_->end ();
+
+ while (!r_->empty () && iter_ != end_)
+ {
+ typename equivset_list::list::iterator l_iter_ = iter_;
+
+ (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
+
+ if (overlap_->empty ())
+ {
+ ++iter_;
+ }
+ else if ((*l_iter_)->empty ())
+ {
+ delete *l_iter_;
+ *l_iter_ = overlap_.release ();
+
+ // VC++ 6 Hack:
+ equivset_ptr temp_overlap_ (new equivset);
+
+ overlap_ = temp_overlap_;
+ ++iter_;
+ }
+ else if (r_->empty ())
+ {
+ delete r_.release ();
+ r_ = overlap_;
+
+ // VC++ 6 Hack:
+ equivset_ptr temp_overlap_ (new equivset);
+
+ overlap_ = temp_overlap_;
+ break;
+ }
+ else
+ {
+ iter_ = lhs_->insert (++iter_, 0);
+ *iter_ = overlap_.release ();
+
+ // VC++ 6 Hack:
+ equivset_ptr temp_overlap_ (new equivset);
+
+ overlap_ = temp_overlap_;
+ ++iter_;
+ end_ = lhs_->end ();
+ }
+ }
+
+ if (!r_->empty ())
+ {
+ lhs_->push_back (0);
+ lhs_->back () = r_.release ();
+ }
+ }
+ }
+ }
+
+ static void fill_rhs_list (const node_vector *vector_,
+ const index_set_vector &set_mapping_, equivset_list &list_)
+ {
+ typename node_vector::const_iterator iter_ =
+ vector_->begin ();
+ typename node_vector::const_iterator end_ =
+ vector_->end ();
+
+ for (; iter_ != end_; ++iter_)
+ {
+ const detail::node *node_ = *iter_;
+
+ if (!node_->end_state ())
+ {
+ std::size_t token_ = node_->token ();
+
+ if (token_ != null_token)
+ {
+ list_->push_back (0);
+
+ if (token_ == bol_token || token_ == eol_token)
+ {
+ std::set<std::size_t> index_set_;
+
+ index_set_.insert (token_);
+ list_->back () = new equivset (index_set_,
+ node_->greedy (), node_->token (), node_->followpos ());
+ }
+ else
+ {
+ list_->back () = new equivset (set_mapping_[token_],
+ node_->greedy (), node_->token (), node_->followpos ());
+ }
+ }
+ }
+ }
+ }
+
+ static void fixup_bol (detail::node * &root_,
+ node_ptr_vector &node_ptr_vector_)
+ {
+ typename detail::node::node_vector *first_ = &root_->firstpos ();
+ bool found_ = false;
+ typename detail::node::node_vector::const_iterator iter_ =
+ first_->begin ();
+ typename detail::node::node_vector::const_iterator end_ =
+ first_->end ();
+
+ for (; iter_ != end_; ++iter_)
+ {
+ const detail::node *node_ = *iter_;
+
+ found_ = !node_->end_state () && node_->token () == bol_token;
+
+ if (found_) break;
+ }
+
+ if (!found_)
+ {
+ node_ptr_vector_->push_back (0);
+ node_ptr_vector_->back () = new detail::leaf_node (bol_token, true);
+
+ detail::node *lhs_ = node_ptr_vector_->back ();
+
+ node_ptr_vector_->push_back (0);
+ node_ptr_vector_->back () = new detail::leaf_node (null_token, true);
+
+ detail::node *rhs_ = node_ptr_vector_->back ();
+
+ node_ptr_vector_->push_back (0);
+ node_ptr_vector_->back () =
+ new detail::selection_node (lhs_, rhs_);
+ lhs_ = node_ptr_vector_->back ();
+
+ node_ptr_vector_->push_back (0);
+ node_ptr_vector_->back () =
+ new detail::sequence_node (lhs_, root_);
+ root_ = node_ptr_vector_->back ();
+ }
+ }
+
+ static void minimise_dfa (const std::size_t dfa_alphabet_,
+ size_t_vector &dfa_, std::size_t size_)
+ {
+ const std::size_t *first_ = &dfa_.front ();
+ const std::size_t *second_ = 0;
+ const std::size_t *end_ = first_ + size_;
+ std::size_t index_ = 1;
+ std::size_t new_index_ = 1;
+ std::size_t curr_index_ = 0;
+ index_set index_set_;
+ size_t_vector lookup_;
+ std::size_t *lookup_ptr_ = 0;
+
+ lookup_.resize (size_ / dfa_alphabet_, null_token);
+ lookup_ptr_ = &lookup_.front ();
+ *lookup_ptr_ = 0;
+ // Only one 'jam' state, so skip it.
+ first_ += dfa_alphabet_;
+
+ for (; first_ < end_; first_ += dfa_alphabet_, ++index_)
+ {
+ for (second_ = first_ + dfa_alphabet_, curr_index_ = index_ + 1;
+ second_ < end_; second_ += dfa_alphabet_, ++curr_index_)
+ {
+ if (index_set_.find (curr_index_) != index_set_.end ())
+ {
+ continue;
+ }
+
+ // Some systems have memcmp in namespace std.
+ using namespace std;
+
+ if (memcmp (first_, second_, sizeof (std::size_t) *
+ dfa_alphabet_) == 0)
+ {
+ index_set_.insert (curr_index_);
+ lookup_ptr_[curr_index_] = new_index_;
+ }
+ }
+
+ if (lookup_ptr_[index_] == null_token)
+ {
+ lookup_ptr_[index_] = new_index_;
+ ++new_index_;
+ }
+ }
+
+ if (!index_set_.empty ())
+ {
+ const std::size_t *front_ = &dfa_.front ();
+ size_t_vector new_dfa_ (front_, front_ + dfa_alphabet_);
+ typename index_set::iterator set_end_ =
+ index_set_.end ();
+ const std::size_t *ptr_ = front_ + dfa_alphabet_;
+ std::size_t *new_ptr_ = 0;
+
+ new_dfa_.resize (size_ - index_set_.size () * dfa_alphabet_, 0);
+ new_ptr_ = &new_dfa_.front () + dfa_alphabet_;
+ size_ /= dfa_alphabet_;
+
+ for (index_ = 1; index_ < size_; ++index_)
+ {
+ if (index_set_.find (index_) != set_end_)
+ {
+ ptr_ += dfa_alphabet_;
+ continue;
+ }
+
+ new_ptr_[end_state_index] = ptr_[end_state_index];
+ new_ptr_[id_index] = ptr_[id_index];
+ new_ptr_[state_index] = ptr_[state_index];
+ new_ptr_[bol_index] = lookup_ptr_[ptr_[bol_index]];
+ new_ptr_[eol_index] = lookup_ptr_[ptr_[eol_index]];
+ new_ptr_ += dfa_offset;
+ ptr_ += dfa_offset;
+
+ for (std::size_t i_ = dfa_offset; i_ < dfa_alphabet_; ++i_)
+ {
+ *new_ptr_++ = lookup_ptr_[*ptr_++];
+ }
+ }
+
+ dfa_.swap (new_dfa_);
+ }
+ }
+};
+
+typedef basic_generator<char> generator;
+typedef basic_generator<wchar_t> wgenerator;
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/input.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/input.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,486 @@
+// input.hpp
+// Copyright (c) 2008 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_INPUT
+#define BOOST_LEXER_INPUT
+
+#include "char_traits.hpp"
+#include <iterator>
+#include "size_t.hpp"
+#include "state_machine.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+template<typename FwdIter, typename Traits =
+ char_traits<typename boost::detail::iterator_traits<FwdIter>::value_type> >
+class basic_input
+{
+public:
+ class iterator
+ {
+ public:
+#if defined _MSC_VER && _MSC_VER <= 1200
+ friend basic_input;
+#else
+ friend class basic_input;
+#endif
+
+ struct data
+ {
+ std::size_t id;
+ FwdIter start;
+ FwdIter end;
+ bool bol;
+ std::size_t state;
+
+ // Construct in end() state.
+ data () :
+ id (0),
+ bol (false),
+ state (npos)
+ {
+ }
+
+ bool operator == (const data &rhs_) const
+ {
+ return id == rhs_.id && start == rhs_.start &&
+ end == rhs_.end && bol == rhs_.bol && state == rhs_.state;
+ }
+ };
+
+ iterator () :
+ _input (0)
+ {
+ }
+
+ bool operator == (const iterator &rhs_) const
+ {
+ return _data == rhs_._data;
+ }
+
+ bool operator != (const iterator &rhs_) const
+ {
+ return !(*this == rhs_);
+ }
+
+ data &operator * ()
+ {
+ return _data;
+ }
+
+ data *operator -> ()
+ {
+ return &_data;
+ }
+
+ // Let compiler generate operator = ().
+
+ // prefix version
+ iterator &operator ++ ()
+ {
+ next_token ();
+ return *this;
+ }
+
+ // postfix version
+ iterator operator ++ (int)
+ {
+ iterator iter_ = *this;
+
+ next_token ();
+ return iter_;
+ }
+
+ private:
+ // Not owner (obviously!)
+ const basic_input *_input;
+ data _data;
+
+ void next_token ()
+ {
+ _data.start = _data.end;
+
+ if (_input->_state_machine->_dfa->size () == 1)
+ {
+ if (_input->_state_machine->_seen_BOL_assertion ||
+ _input->_state_machine->_seen_EOL_assertion)
+ {
+ _data.id = next
+ (&_input->_state_machine->_lookup->front ()->front (),
+ _input->_state_machine->_dfa_alphabet.front (),
+ &_input->_state_machine->_dfa->front ()->front (),
+ _input->_begin, _data.end, _input->_end);
+ }
+ else
+ {
+ _data.id = next (&_input->_state_machine->_lookup->
+ front ()->front (), _input->_state_machine->
+ _dfa_alphabet.front (), &_input->_state_machine->
+ _dfa->front ()->front (), _data.end, _input->_end);
+ }
+ }
+ else
+ {
+ if (_input->_state_machine->_seen_BOL_assertion ||
+ _input->_state_machine->_seen_EOL_assertion)
+ {
+ _data.id = next (*_input->_state_machine, _data.state,
+ _input->_begin, _data.end, _input->_end);
+ }
+ else
+ {
+ _data.id = next (*_input->_state_machine, _data.state,
+ _data.end, _input->_end);
+ }
+ }
+
+ if (_data.end == _input->_end && _data.start == _data.end)
+ {
+ // Ensure current state matches that returned by end().
+ _data.state = npos;
+ }
+ }
+
+ std::size_t next (const basic_state_machine
+ <typename Traits::char_type> &state_machine_,
+ std::size_t &start_state_, const FwdIter &start_,
+ FwdIter &start_token_, const FwdIter &end_)
+ {
+ if (start_token_ == end_) return 0;
+
+ again:
+ bool bol_ = _data.bol;
+ const std::size_t * lookup_ = &state_machine_._lookup[start_state_]->
+ front ();
+ std::size_t dfa_alphabet_ = state_machine_._dfa_alphabet[start_state_];
+ const std::size_t *dfa_ = &state_machine_._dfa[start_state_]->front ();
+ const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
+ FwdIter curr_ = start_token_;
+ bool end_state_ = *ptr_ != 0;
+ std::size_t id_ = *(ptr_ + id_index);
+ bool end_bol_ = bol_;
+ FwdIter end_token_ = start_token_;
+
+ while (curr_ != end_)
+ {
+ const std::size_t BOL_state_ = ptr_[bol_index];
+ const std::size_t EOL_state_ = ptr_[eol_index];
+
+ if (BOL_state_ && bol_)
+ {
+ ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
+ }
+ else if (EOL_state_ && *curr_ == '\n')
+ {
+ ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
+ }
+ else
+ {
+ typename Traits::char_type prev_char_ = *curr_++;
+
+ bol_ = prev_char_ == '\n';
+
+ const std::size_t state_ =
+ ptr_[lookup_[static_cast<typename Traits::index_type>
+ (prev_char_)]];
+
+ if (state_ == 0)
+ {
+ break;
+ }
+
+ ptr_ = &dfa_[state_ * dfa_alphabet_];
+ }
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ start_state_ = *(ptr_ + state_index);
+ end_bol_ = bol_;
+ end_token_ = curr_;
+ }
+ }
+
+ const std::size_t EOL_state_ = ptr_[eol_index];
+
+ if (EOL_state_ && curr_ == end_)
+ {
+ ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ start_state_ = *(ptr_ + state_index);
+ end_bol_ = bol_;
+ end_token_ = curr_;
+ }
+ }
+
+ if (end_state_)
+ {
+ // return longest match
+ _data.bol = end_bol_;
+ start_token_ = end_token_;
+
+ if (id_ == 0) goto again;
+ }
+ else
+ {
+ // No match causes char to be skipped
+ _data.bol = *start_token_ == '\n';
+ ++start_token_;
+ id_ = npos;
+ }
+
+ return id_;
+ }
+
+ std::size_t next (const basic_state_machine
+ <typename Traits::char_type> &state_machine_,
+ std::size_t &start_state_, FwdIter &start_token_,
+ FwdIter const &end_)
+ {
+ if (start_token_ == end_) return 0;
+
+ again:
+ const std::size_t * lookup_ = &state_machine_._lookup[start_state_]->
+ front ();
+ std::size_t dfa_alphabet_ = state_machine_._dfa_alphabet[start_state_];
+ const std::size_t *dfa_ = &state_machine_._dfa[start_state_]->front ();
+ const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
+ FwdIter curr_ = start_token_;
+ bool end_state_ = *ptr_ != 0;
+ std::size_t id_ = *(ptr_ + id_index);
+ FwdIter end_token_ = start_token_;
+
+ while (curr_ != end_)
+ {
+ const std::size_t state_ = ptr_[lookup_[static_cast
+ <typename Traits::index_type>(*curr_++)]];
+
+ if (state_ == 0)
+ {
+ break;
+ }
+
+ ptr_ = &dfa_[state_ * dfa_alphabet_];
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ start_state_ = *(ptr_ + state_index);
+ end_token_ = curr_;
+ }
+ }
+
+ if (end_state_)
+ {
+ // return longest match
+ start_token_ = end_token_;
+
+ if (id_ == 0) goto again;
+ }
+ else
+ {
+ // No match causes char to be skipped
+ ++start_token_;
+ id_ = npos;
+ }
+
+ return id_;
+ }
+
+ std::size_t next (const std::size_t * const lookup_,
+ const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
+ FwdIter const &start_, FwdIter &start_token_, FwdIter const &end_)
+ {
+ if (start_token_ == end_) return 0;
+
+ bool bol_ = _data.bol;
+ const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
+ FwdIter curr_ = start_token_;
+ bool end_state_ = *ptr_ != 0;
+ std::size_t id_ = *(ptr_ + id_index);
+ bool end_bol_ = bol_;
+ FwdIter end_token_ = start_token_;
+
+ while (curr_ != end_)
+ {
+ const std::size_t BOL_state_ = ptr_[bol_index];
+ const std::size_t EOL_state_ = ptr_[eol_index];
+
+ if (BOL_state_ && bol_)
+ {
+ ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
+ }
+ else if (EOL_state_ && *curr_ == '\n')
+ {
+ ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
+ }
+ else
+ {
+ typename Traits::char_type prev_char_ = *curr_++;
+
+ bol_ = prev_char_ == '\n';
+
+ const std::size_t state_ =
+ ptr_[lookup_[static_cast<typename Traits::index_type>
+ (prev_char_)]];
+
+ if (state_ == 0)
+ {
+ break;
+ }
+
+ ptr_ = &dfa_[state_ * dfa_alphabet_];
+ }
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ end_bol_ = bol_;
+ end_token_ = curr_;
+ }
+ }
+
+ const std::size_t EOL_state_ = ptr_[eol_index];
+
+ if (EOL_state_ && curr_ == end_)
+ {
+ ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ end_bol_ = bol_;
+ end_token_ = curr_;
+ }
+ }
+
+ if (end_state_)
+ {
+ // return longest match
+ start_token_ = end_token_;
+ _data.bol = end_bol_;
+ }
+ else
+ {
+ // No match causes char to be skipped
+ _data.bol = *start_token_ == '\n';
+ ++start_token_;
+ id_ = npos;
+ }
+
+ return id_;
+ }
+
+ std::size_t next (const std::size_t * const lookup_,
+ const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
+ FwdIter &start_token_, FwdIter const &end_)
+ {
+ if (start_token_ == end_) return 0;
+
+ const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
+ FwdIter curr_ = start_token_;
+ bool end_state_ = *ptr_ != 0;
+ std::size_t id_ = *(ptr_ + id_index);
+ FwdIter end_token_ = start_token_;
+
+ while (curr_ != end_)
+ {
+ const std::size_t state_ = ptr_[lookup_[static_cast
+ <typename Traits::index_type>(*curr_++)]];
+
+ if (state_ == 0)
+ {
+ break;
+ }
+
+ ptr_ = &dfa_[state_ * dfa_alphabet_];
+
+ if (*ptr_)
+ {
+ end_state_ = true;
+ id_ = *(ptr_ + id_index);
+ end_token_ = curr_;
+ }
+ }
+
+ if (end_state_)
+ {
+ // return longest match
+ start_token_ = end_token_;
+ }
+ else
+ {
+ // No match causes char to be skipped
+ ++start_token_;
+ id_ = npos;
+ }
+
+ return id_;
+ }
+ };
+
+#if defined _MSC_VER && _MSC_VER <= 1200
+ friend iterator;
+#else
+ friend class iterator;
+#endif
+
+ // Make it explict that we are NOT taking a copy of state_machine_!
+ basic_input (const basic_state_machine<typename Traits::char_type>
+ *state_machine_, const FwdIter &begin_, const FwdIter &end_) :
+ _state_machine (state_machine_),
+ _begin (begin_),
+ _end (end_)
+ {
+ }
+
+ iterator begin () const
+ {
+ iterator iter_;
+
+ iter_._input = this;
+ iter_._data.id = npos;
+ iter_._data.start = _begin;
+ iter_._data.end = _begin;
+ iter_._data.bol = _state_machine->_seen_BOL_assertion;
+ iter_._data.state = 0;
+ ++iter_;
+ return iter_;
+ }
+
+ iterator end () const
+ {
+ iterator iter_;
+
+ iter_._input = this;
+ iter_._data.start = _end;
+ iter_._data.end = _end;
+ return iter_;
+ }
+
+private:
+ const basic_state_machine<typename Traits::char_type> *_state_machine;
+ FwdIter _begin;
+ FwdIter _end;
+};
+
+typedef basic_input<std::string::iterator> iter_input;
+typedef basic_input<std::wstring::iterator> iter_winput;
+typedef basic_input<const char *> ptr_input;
+typedef basic_input<const wchar_t *> ptr_winput;
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,522 @@
+// parser.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_PARSER_HPP
+#define BOOST_LEXER_PARSER_HPP
+
+#include <assert.h>
+#include "tree/end_node.hpp"
+#include "tree/iteration_node.hpp"
+#include "tree/leaf_node.hpp"
+#include "../runtime_error.hpp"
+#include "tree/selection_node.hpp"
+#include "tree/sequence_node.hpp"
+#include "../size_t.hpp"
+#include "tokeniser/re_tokeniser.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+template<typename CharT>
+class basic_parser
+{
+public:
+ typedef basic_re_tokeniser<CharT> tokeniser;
+ typedef typename tokeniser::string string;
+ typedef std::map<string, const node *> macro_map;
+ typedef node::node_ptr_vector node_ptr_vector;
+ typedef typename tokeniser::num_token token;
+
+/*
+ General principles of regex parsing:
+ - Every regex is a sequence of sub-regexes.
+ - Regexes consist of operands and operators
+ - All operators decompose to sequence, selection ('|') and iteration ('*')
+ - Regex tokens are stored on the stack.
+ - When a complete sequence of regex tokens is on the stack it is processed.
+
+Grammar:
+
+<REGEX> -> <OREXP>
+<OREXP> -> <SEQUENCE> | <OREXP>'|'<SEQUENCE>
+<SEQUENCE> -> <SUB>
+<SUB> -> <EXPRESSION> | <SUB><EXPRESSION>
+<EXPRESSION> -> <REPEAT>
+<REPEAT> -> charset | macro | '('<REGEX>')' | <REPEAT><DUPLICATE>
+<DUPLICATE> -> '?' | '*' | '+' | '{n[,[m]]}'
+*/
+ static node *parse (const CharT *start_, const CharT * const end_,
+ const std::size_t id_, const std::size_t dfa_state_,
+ const bool case_sensitive_, const bool dot_not_newline_,
+ const std::locale &locale_, node_ptr_vector &node_ptr_vector_,
+ const macro_map &macromap_, typename tokeniser::token_map &map_,
+ bool &seen_BOL_assertion_, bool &seen_EOL_assertion_)
+ {
+ node *root_ = 0;
+ state state_ (start_, end_, case_sensitive_, locale_,
+ dot_not_newline_);
+ token lhs_token_;
+ token rhs_token_;
+ token_stack token_stack_;
+ tree_node_stack tree_node_stack_;
+ char action_ = 0;
+
+ token_stack_.push (rhs_token_);
+ tokeniser::next (state_, map_, rhs_token_);
+
+ do
+ {
+ lhs_token_ = token_stack_.top ();
+ action_ = lhs_token_.precedence (rhs_token_._type);
+
+ switch (action_)
+ {
+ case '<':
+ case '=':
+ token_stack_.push (rhs_token_);
+ tokeniser::next (state_, map_, rhs_token_);
+ break;
+ case '>':
+ reduce (token_stack_, macromap_, node_ptr_vector_,
+ tree_node_stack_);
+ break;
+ default:
+ std::ostringstream ss_;
+
+ ss_ << "A syntax error occurred: '" <<
+ lhs_token_.precedence_string () <<
+ "' against '" << rhs_token_.precedence_string () <<
+ "' at index " << state_._index << ".";
+ throw runtime_error (ss_.str ().c_str ());
+ break;
+ }
+ } while (!token_stack_.empty ());
+
+ if (tree_node_stack_.empty ())
+ {
+ throw runtime_error ("Empty rules are not allowed.");
+ }
+
+ assert (tree_node_stack_.size () == 1);
+
+ node *lhs_node_ = tree_node_stack_.top ();
+
+ tree_node_stack_.pop ();
+
+ if (id_ == 0)
+ {
+ // Macros have no end state...
+ root_ = lhs_node_;
+ }
+ else
+ {
+ node_ptr_vector_->push_back (0);
+
+ node *rhs_node_ = new end_node (id_, dfa_state_);
+
+ node_ptr_vector_->back () = rhs_node_;
+ node_ptr_vector_->push_back (0);
+ node_ptr_vector_->back () = new sequence_node (lhs_node_, rhs_node_);
+ root_ = node_ptr_vector_->back ();
+ }
+
+ // Done this way as bug in VC++ 6 prevents |= operator working
+ // properly!
+ if (state_._seen_BOL_assertion) seen_BOL_assertion_ = true;
+
+ if (state_._seen_EOL_assertion) seen_EOL_assertion_ = true;
+
+ return root_;
+ }
+
+private:
+ typedef typename tokeniser::state state;
+ typedef std::stack<token> token_stack;
+ typedef node::node_stack tree_node_stack;
+
+ static void reduce (token_stack &token_stack_,
+ const macro_map &macromap_, node_ptr_vector &node_vector_ptr_,
+ tree_node_stack &tree_node_stack_)
+ {
+ typename tokeniser::num_token lhs_;
+ typename tokeniser::num_token rhs_;
+ token_stack handle_;
+ char action_ = 0;
+
+ do
+ {
+ rhs_ = token_stack_.top ();
+ token_stack_.pop ();
+ handle_.push (rhs_);
+
+ if (!token_stack_.empty ())
+ {
+ lhs_ = token_stack_.top ();
+ action_ = lhs_.precedence (rhs_._type);
+ }
+ } while (!token_stack_.empty () && action_ == '=');
+
+ assert (token_stack_.empty () || action_ == '<');
+
+ switch (rhs_._type)
+ {
+ case token::BEGIN:
+ // finished processing so exit
+ break;
+ case token::REGEX:
+ // finished parsing, nothing to do
+ break;
+ case token::OREXP:
+ orexp (handle_, token_stack_, node_vector_ptr_, tree_node_stack_);
+ break;
+ case token::SEQUENCE:
+ token_stack_.push (token::OREXP);
+ break;
+ case token::SUB:
+ sub (handle_, token_stack_, node_vector_ptr_, tree_node_stack_);
+ break;
+ case token::EXPRESSION:
+ token_stack_.push (token::SUB);
+ break;
+ case token::REPEAT:
+ repeat (handle_, token_stack_);
+ break;
+ case token::CHARSET:
+ charset (handle_, token_stack_, node_vector_ptr_,
+ tree_node_stack_);
+ break;
+ case token::MACRO:
+ macro (handle_, token_stack_, macromap_, node_vector_ptr_,
+ tree_node_stack_);
+ break;
+ case token::OPENPAREN:
+ openparen (handle_, token_stack_);
+ break;
+ case token::OPT:
+ case token::AOPT:
+ optional (rhs_._type == token::OPT, node_vector_ptr_,
+ tree_node_stack_);
+ token_stack_.push (token::DUP);
+ break;
+ case token::ZEROORMORE:
+ case token::AZEROORMORE:
+ zero_or_more (rhs_._type == token::ZEROORMORE, node_vector_ptr_,
+ tree_node_stack_);
+ token_stack_.push (token::DUP);
+ break;
+ case token::ONEORMORE:
+ case token::AONEORMORE:
+ one_or_more (rhs_._type == token::ONEORMORE, node_vector_ptr_,
+ tree_node_stack_);
+ token_stack_.push (token::DUP);
+ break;
+ case token::REPEATN:
+ case token::AREPEATN:
+ repeatn (rhs_._type == token::REPEATN, handle_.top (),
+ node_vector_ptr_, tree_node_stack_);
+ token_stack_.push (token::DUP);
+ break;
+ default:
+ throw runtime_error
+ ("Internal error regex_parser::reduce");
+ break;
+ }
+ }
+
+ static void orexp (token_stack &handle_, token_stack &token_stack_,
+ node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
+ {
+ assert (handle_.top ()._type == token::OREXP &&
+ (handle_.size () == 1 || handle_.size () == 3));
+
+ if (handle_.size () == 1)
+ {
+ token_stack_.push (token::REGEX);
+ }
+ else
+ {
+ handle_.pop ();
+ assert (handle_.top ()._type == token::OR);
+ handle_.pop ();
+ assert (handle_.top ()._type == token::SEQUENCE);
+ perform_or (node_ptr_vector_, tree_node_stack_);
+ token_stack_.push (token::OREXP);
+ }
+ }
+
+ static void sub (token_stack &handle_, token_stack &token_stack_,
+ node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
+ {
+ assert (handle_.top ()._type == token::SUB &&
+ handle_.size () == 1 || handle_.size () == 2);
+
+ if (handle_.size () == 1)
+ {
+ token_stack_.push (token::SEQUENCE);
+ }
+ else
+ {
+ handle_.pop ();
+ assert (handle_.top ()._type == token::EXPRESSION);
+ // perform join
+ sequence (node_ptr_vector_, tree_node_stack_);
+ token_stack_.push (token::SUB);
+ }
+ }
+
+ static void repeat (token_stack &handle_, token_stack &token_stack_)
+ {
+ assert (handle_.top ()._type == token::REPEAT &&
+ handle_.size () >= 1 && handle_.size () <= 3);
+
+ if (handle_.size () == 1)
+ {
+ token_stack_.push (token::EXPRESSION);
+ }
+ else
+ {
+ handle_.pop ();
+ assert (handle_.top ()._type == token::DUP);
+ token_stack_.push (token::REPEAT);
+ }
+ }
+
+ static void charset (token_stack &handle_, token_stack &token_stack_,
+ node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
+ {
+ assert (handle_.top ()._type == token::CHARSET &&
+ handle_.size () == 1);
+ // store charset
+ node_ptr_vector_->push_back (0);
+
+ const size_t id_ = handle_.top ()._id;
+
+ node_ptr_vector_->back () = new leaf_node (id_, true);
+ tree_node_stack_.push (node_ptr_vector_->back ());
+ token_stack_.push (token::REPEAT);
+ }
+
+ static void macro (token_stack &handle_, token_stack &token_stack_,
+ const macro_map &macromap_, node_ptr_vector &node_ptr_vector_,
+ tree_node_stack &tree_node_stack_)
+ {
+ token &top_ = handle_.top ();
+
+ assert (top_._type == token::MACRO && handle_.size () == 1);
+
+ typename macro_map::const_iterator iter_ =
+ macromap_.find (top_._macro);
+
+ if (iter_ == macromap_.end ())
+ {
+ const CharT *name_ = top_._macro;
+ std::basic_stringstream<CharT> ss_;
+ std::ostringstream os_;
+
+ os_ << "Unknown MACRO name '";
+
+ while (*name_)
+ {
+ os_ << ss_.narrow (*name_++, ' ');
+ }
+
+ os_ << "'.";
+ throw runtime_error (os_.str ());
+ }
+
+ tree_node_stack_.push (iter_->second->copy (node_ptr_vector_));
+ token_stack_.push (token::REPEAT);
+ }
+
+ static void openparen (token_stack &handle_, token_stack &token_stack_)
+ {
+ assert (handle_.top ()._type == token::OPENPAREN &&
+ handle_.size () == 3);
+ handle_.pop ();
+ assert (handle_.top ()._type == token::REGEX);
+ handle_.pop ();
+ assert (handle_.top ()._type == token::CLOSEPAREN);
+ token_stack_.push (token::REPEAT);
+ }
+
+ static void perform_or (node_ptr_vector &node_ptr_vector_,
+ tree_node_stack &tree_node_stack_)
+ {
+ // perform or
+ node *rhs_ = tree_node_stack_.top ();
+
+ tree_node_stack_.pop ();
+
+ node *lhs_ = tree_node_stack_.top ();
+
+ node_ptr_vector_->push_back (0);
+ node_ptr_vector_->back () = new selection_node (lhs_, rhs_);
+ tree_node_stack_.top () = node_ptr_vector_->back ();
+ }
+
+ static void sequence (node_ptr_vector &node_ptr_vector_,
+ tree_node_stack &tree_node_stack_)
+ {
+ node *rhs_ = tree_node_stack_.top ();
+
+ tree_node_stack_.pop ();
+
+ node *lhs_ = tree_node_stack_.top ();
+
+ node_ptr_vector_->push_back (0);
+ node_ptr_vector_->back () = new sequence_node (lhs_, rhs_);
+ tree_node_stack_.top () = node_ptr_vector_->back ();
+ }
+
+ static void optional (const bool greedy_,
+ node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
+ {
+ // perform ?
+ node *lhs_ = tree_node_stack_.top ();
+ // You don't know if lhs_ is a leaf_node, so get firstpos.
+ node::node_vector &firstpos_ = lhs_->firstpos();
+
+ for (node::node_vector::iterator iter_ = firstpos_.begin (),
+ end_ = firstpos_.end (); iter_ != end_; ++iter_)
+ {
+ // These are leaf_nodes!
+ (*iter_)->greedy (greedy_);
+ }
+
+ node_ptr_vector_->push_back (0);
+
+ node *rhs_ = new leaf_node (null_token, greedy_);
+
+ node_ptr_vector_->back () = rhs_;
+ node_ptr_vector_->push_back (0);
+ node_ptr_vector_->back () = new selection_node (lhs_, rhs_);
+ tree_node_stack_.top () = node_ptr_vector_->back ();
+ }
+
+ static void zero_or_more (const bool greedy_,
+ node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
+ {
+ // perform *
+ node *ptr_ = tree_node_stack_.top ();
+
+ node_ptr_vector_->push_back (0);
+ node_ptr_vector_->back () = new iteration_node (ptr_, greedy_);
+ tree_node_stack_.top () = node_ptr_vector_->back ();
+ }
+
+ static void one_or_more (const bool greedy_,
+ node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
+ {
+ // perform +
+ node *lhs_ = tree_node_stack_.top ();
+ node *copy_ = lhs_->copy (node_ptr_vector_);
+
+ node_ptr_vector_->push_back (0);
+
+ node *rhs_ = new iteration_node (copy_, greedy_);
+
+ node_ptr_vector_->back () = rhs_;
+ node_ptr_vector_->push_back (0);
+ node_ptr_vector_->back () = new sequence_node (lhs_, rhs_);
+ tree_node_stack_.top () = node_ptr_vector_->back ();
+ }
+
+ static void repeatn (const bool greedy_, const token &token_,
+ node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
+ {
+ // perform {n[,[m]]}
+ // Semantic checks have already been performed.
+ // {0,} = *
+ // {0,1} = ?
+ // {1,} = +
+ // therefore we do not check for these cases.
+ if (!(token_._min == 1 && !token_._comma))
+ {
+ const std::size_t top_ = token_._min > 0 ?
+ token_._min : token_._max;
+
+ if (token_._min == 0)
+ {
+ optional (greedy_, node_ptr_vector_, tree_node_stack_);
+ }
+
+ node *prev_ = tree_node_stack_.top ()->copy (node_ptr_vector_);
+ node *curr_ = 0;
+
+ for (std::size_t i_ = 2; i_ < top_; ++i_)
+ {
+ node *temp_ = prev_->copy (node_ptr_vector_);
+
+ curr_ = temp_;
+ tree_node_stack_.push (0);
+ tree_node_stack_.top () = prev_;
+ sequence (node_ptr_vector_, tree_node_stack_);
+ prev_ = curr_;
+ }
+
+ if (token_._comma && token_._min > 0)
+ {
+ if (token_._min > 1)
+ {
+ node *temp_ = prev_->copy (node_ptr_vector_);
+
+ curr_ = temp_;
+ tree_node_stack_.push (0);
+ tree_node_stack_.top () = prev_;
+ sequence (node_ptr_vector_, tree_node_stack_);
+ prev_ = curr_;
+ }
+
+ if (token_._comma && token_._max)
+ {
+ tree_node_stack_.push (0);
+ tree_node_stack_.top () = prev_;
+ optional (greedy_, node_ptr_vector_, tree_node_stack_);
+
+ node *temp_ = tree_node_stack_.top ();
+
+ tree_node_stack_.pop ();
+ prev_ = temp_;
+
+ const std::size_t count_ = token_._max - token_._min;
+
+ for (std::size_t i_ = 1; i_ < count_; ++i_)
+ {
+ node *temp_ = prev_->copy (node_ptr_vector_);
+
+ curr_ = temp_;
+ tree_node_stack_.push (0);
+ tree_node_stack_.top () = prev_;
+ sequence (node_ptr_vector_, tree_node_stack_);
+ prev_ = curr_;
+ }
+ }
+ else
+ {
+ tree_node_stack_.push (0);
+ tree_node_stack_.top () = prev_;
+ zero_or_more (greedy_, node_ptr_vector_, tree_node_stack_);
+
+ node *temp_ = tree_node_stack_.top ();
+
+ prev_ = temp_;
+ tree_node_stack_.pop ();
+ }
+ }
+
+ tree_node_stack_.push (0);
+ tree_node_stack_.top () = prev_;
+ sequence (node_ptr_vector_, tree_node_stack_);
+ }
+ }
+};
+}
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser/num_token.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser/num_token.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,147 @@
+// num_token.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_NUM_TOKEN_HPP
+#define BOOST_LEXER_NUM_TOKEN_HPP
+
+#include <boost/config.hpp>
+#include "../../consts.hpp" // null_token
+#include "../../size_t.hpp"
+#include <boost/detail/workaround.hpp>
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+template<typename CharT>
+struct basic_num_token
+{
+ enum type {BEGIN, REGEX, OREXP, SEQUENCE, SUB, EXPRESSION, REPEAT,
+ DUP, OR, CHARSET, MACRO, OPENPAREN, CLOSEPAREN, OPT, AOPT,
+ ZEROORMORE, AZEROORMORE, ONEORMORE, AONEORMORE, REPEATN, AREPEATN,
+ END};
+
+ type _type;
+ std::size_t _id;
+ std::size_t _min;
+ bool _comma;
+ std::size_t _max;
+ CharT _macro[max_macro_len + 1];
+ static const char _precedence_table[END + 1][END + 1];
+ static const char *_precedence_strings[END + 1];
+
+ basic_num_token (const type type_ = BEGIN,
+ const std::size_t id_ = null_token) :
+ _type (type_),
+ _id (id_),
+ _min (0),
+ _comma (false),
+ _max (0)
+ {
+ *_macro = 0;
+ }
+
+ basic_num_token &operator = (const basic_num_token &rhs_)
+ {
+ _type = rhs_._type;
+ _id = rhs_._id;
+ _min = rhs_._min;
+ _comma = rhs_._comma;
+ _max = rhs_._max;
+
+ if (_type == MACRO)
+ {
+ const CharT *read_ = rhs_._macro;
+ CharT *write_ = _macro;
+
+ while (*read_)
+ {
+ *write_++ = *read_++;
+ }
+
+ *write_ = 0;
+ }
+
+ return *this;
+ }
+
+ void set (const type type_)
+ {
+ _type = type_;
+ _id = null_token;
+ }
+
+ void set (const type type_, const std::size_t id_)
+ {
+ _type = type_;
+ _id = id_;
+ }
+
+ void min_max (const std::size_t min_, const bool comma_,
+ const std::size_t max_)
+ {
+ _min = min_;
+ _comma = comma_;
+ _max = max_;
+ }
+
+ char precedence (const type type_) const
+ {
+ return _precedence_table[_type][type_];
+ }
+
+ const char *precedence_string () const
+ {
+ return _precedence_strings[_type];
+ }
+};
+
+template<typename CharT>
+const char basic_num_token<CharT>::_precedence_table[END + 1][END + 1] = {
+// BEG, REG, ORE, SEQ, SUB, EXP, RPT, DUP, | , CHR, MCR, ( , ) , ? , ?? , * , *? , + , +?, {n}?, {n}, END
+/*BEGIN*/{' ', '<', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*REGEX*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*OREXP*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* SEQ */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* SUB */{' ', ' ', ' ', ' ', ' ', '=', '<', ' ', '>', '<', '<', '<', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*EXPRE*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* RPT */{' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', '>', '<', '<', '<', '<', '<', '<', '<', '<', '>'},
+/*DUPLI*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* | */{' ', ' ', ' ', '=', '<', '<', '<', ' ', ' ', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '},
+/*CHARA*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/*MACRO*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/* ( */{' ', '=', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '},
+/* ) */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/* ? */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* ?? */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* * */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* *? */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* + */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* +? */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*{n,m}*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*{nm}?*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* END */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '}
+};
+
+template<typename CharT>
+const char *basic_num_token<CharT>::_precedence_strings[END + 1] =
+#if BOOST_WORKAROUND(BOOST_INTEL_CXX_VERSION, BOOST_TESTED_AT(910))
+{{"BEGIN"}, {"REGEX"}, {"OREXP"}, {"SEQUENCE"}, {"SUB"}, {"EXPRESSION"},
+ {"REPEAT"}, {"DUPLICATE"}, {"|"}, {"CHARSET"}, {"MACRO"},
+ {"("}, {")"}, {"?"}, {"??"}, {"*"}, {"*?"}, {"+"}, {"+?"}, {"{n[,[m]]}"},
+ {"{n[,[m]]}?"}, {"END"}
+}
+#else
+{"BEGIN", "REGEX", "OREXP", "SEQUENCE", "SUB", "EXPRESSION", "REPEAT",
+ "DUPLICATE", "|", "CHARSET", "MACRO", "(", ")", "?", "??", "*", "*?",
+ "+", "+?", "{n[,[m]]}", "{n[,[m]]}?", "END"};
+}
+#endif
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,471 @@
+// tokeniser.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_RE_TOKENISER_HPP
+#define BOOST_LEXER_RE_TOKENISER_HPP
+
+// memcpy()
+#include <cstring>
+#include <map>
+#include "num_token.hpp"
+#include "../../runtime_error.hpp"
+#include "../../size_t.hpp"
+#include <sstream>
+#include "../../string_token.hpp"
+#include "re_tokeniser_helper.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+template<typename CharT>
+class basic_re_tokeniser
+{
+public:
+ typedef basic_num_token<CharT> num_token;
+ typedef basic_re_tokeniser_state<CharT> state;
+ typedef basic_string_token<CharT> string_token;
+ typedef typename string_token::string string;
+ typedef std::map<string_token, std::size_t> token_map;
+ typedef std::pair<string_token, std::size_t> token_pair;
+
+ static void next (state &state_, token_map &map_, num_token &token_)
+ {
+ CharT ch_ = 0;
+ bool eos_ = state_.next (ch_);
+
+ token_.min_max (0, false, 0);
+
+ while (!eos_ && ch_ == '"')
+ {
+ state_._in_string ^= 1;
+ eos_ = state_.next (ch_);
+ }
+
+ if (eos_)
+ {
+ if (state_._in_string)
+ {
+ throw runtime_error ("Unexpected end of regex "
+ "(missing '\"').");
+ }
+
+ token_.set (num_token::END, null_token);
+ }
+ else
+ {
+ if (ch_ == '\\')
+ {
+ // Even if we are in a string, respect escape sequences...
+ escape (state_, map_, token_);
+ }
+ else if (state_._in_string)
+ {
+ // All other meta characters lose their special meaning
+ // inside a string.
+ create_charset_token (string (1, ch_), false, map_, token_);
+ }
+ else
+ {
+ // Not an escape sequence and not inside a string, so
+ // check for meta characters.
+ switch (ch_)
+ {
+ case '(':
+ token_.set (num_token::OPENPAREN, null_token);
+ ++state_._paren_count;
+ break;
+ case ')':
+ --state_._paren_count;
+
+ if (state_._paren_count < 0)
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Number of open parenthesis < 0 at index " <<
+ state_._index - 1 << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+
+ token_.set (num_token::CLOSEPAREN, null_token);
+ break;
+ case '?':
+ if (!state_.eos () && *state_._curr == '?')
+ {
+ token_.set (num_token::AOPT, null_token);
+ state_.increment ();
+ }
+ else
+ {
+ token_.set (num_token::OPT, null_token);
+ }
+
+ break;
+ case '*':
+ if (!state_.eos () && *state_._curr == '?')
+ {
+ token_.set (num_token::AZEROORMORE, null_token);
+ state_.increment ();
+ }
+ else
+ {
+ token_.set (num_token::ZEROORMORE, null_token);
+ }
+
+ break;
+ case '+':
+ if (!state_.eos () && *state_._curr == '?')
+ {
+ token_.set (num_token::AONEORMORE, null_token);
+ state_.increment ();
+ }
+ else
+ {
+ token_.set (num_token::ONEORMORE, null_token);
+ }
+
+ break;
+ case '{':
+ open_curly (state_, token_);
+ break;
+ case '|':
+ token_.set (num_token::OR, null_token);
+ break;
+ case '^':
+ token_.set (num_token::CHARSET, bol_token);
+ state_._seen_BOL_assertion = true;
+ break;
+ case '$':
+ token_.set (num_token::CHARSET, eol_token);
+ state_._seen_EOL_assertion = true;
+ break;
+ case '.':
+ {
+ string dot_;
+
+ if (state_._dot_not_newline)
+ {
+ dot_ = '\n';
+ }
+
+ create_charset_token (dot_, true, map_, token_);
+ break;
+ }
+ case '[':
+ {
+ charset (state_, map_, token_);
+ break;
+ }
+ default:
+ if (!state_._case_sensitive &&
+ (std::isupper (ch_, state_._locale) ||
+ std::islower (ch_, state_._locale)))
+ {
+ CharT upper_ = std::toupper (ch_, state_._locale);
+ CharT lower_ = std::tolower (ch_, state_._locale);
+
+ string str_ (1, upper_);
+
+ str_ += lower_;
+ create_charset_token (str_, false, map_, token_);
+ }
+ else
+ {
+ create_charset_token (string (1, ch_), false,
+ map_, token_);
+ }
+
+ break;
+ }
+ }
+ }
+ }
+
+private:
+ typedef basic_re_tokeniser_helper<CharT> tokeniser_helper;
+
+ static void escape (state &state_, token_map &map_, num_token &token_)
+ {
+ CharT ch_ = 0;
+ std::size_t str_len_ = 0;
+ const CharT *str_ = tokeniser_helper::escape_sequence (state_,
+ ch_, str_len_);
+
+ if (str_)
+ {
+ state state2_ (str_ + 1, str_ + str_len_, state_._case_sensitive,
+ state_._locale, state_._dot_not_newline);
+
+ charset (state2_, map_, token_);
+ }
+ else
+ {
+ create_charset_token (string (1, ch_), false, map_, token_);
+ }
+ }
+
+ static void charset (state &state_, token_map &map_, num_token &token_)
+ {
+ string chars_;
+ bool negated_ = false;
+
+ tokeniser_helper::charset (state_, chars_, negated_);
+ create_charset_token (chars_, negated_, map_, token_);
+ }
+
+ static void create_charset_token (const string &charset_,
+ const bool negated_, token_map &map_, num_token &token_)
+ {
+ std::size_t id_ = null_token;
+ string_token stok_ (negated_, charset_);
+
+ stok_.remove_duplicates ();
+ stok_.normalise ();
+
+ typename token_map::const_iterator iter_ = map_.find (stok_);
+
+ if (iter_ == map_.end ())
+ {
+ id_ = map_.size ();
+ map_.insert (token_pair (stok_, id_));
+ }
+ else
+ {
+ id_ = iter_->second;
+ }
+
+ token_.set (num_token::CHARSET, id_);
+ }
+
+ static void open_curly (state &state_, num_token &token_)
+ {
+ if (state_.eos ())
+ {
+ throw runtime_error ("Unexpected end of regex "
+ "(missing '}').");
+ }
+ else if (*state_._curr >= '0' && *state_._curr <= '9')
+ {
+ repeat_n (state_, token_);
+
+ if (!state_.eos () && *state_._curr == '?')
+ {
+ token_._type = num_token::AREPEATN;
+ state_.increment ();
+ }
+ }
+ else
+ {
+ macro (state_, token_);
+ }
+ }
+
+ // SYNTAX:
+ // {n[,[n]]}
+ // SEMANTIC RULES:
+ // {0} - INVALID (throw exception)
+ // {0,} = *
+ // {0,0} - INVALID (throw exception)
+ // {0,1} = ?
+ // {1,} = +
+ // {min,max} where min == max - {min}
+ // {min,max} where max < min - INVALID (throw exception)
+ static void repeat_n (state &state_, num_token &token_)
+ {
+ CharT ch_ = 0;
+ bool eos_ = state_.next (ch_);
+
+ while (!eos_ && ch_ >= '0' && ch_ <= '9')
+ {
+ token_._min *= 10;
+ token_._min += ch_ - '0';
+ eos_ = state_.next (ch_);
+ }
+
+ if (eos_)
+ {
+ throw runtime_error ("Unexpected end of regex "
+ "(missing '}').");
+ }
+
+ bool min_max_ = false;
+ bool repeatn_ = true;
+
+ token_._comma = ch_ == ',';
+
+ if (token_._comma)
+ {
+ eos_ = state_.next (ch_);
+
+ if (eos_)
+ {
+ throw runtime_error ("Unexpected end of regex "
+ "(missing '}').");
+ }
+
+ if (ch_ == '}')
+ {
+ // Small optimisation: Check for '*' equivalency.
+ if (token_._min == 0)
+ {
+ token_.set (num_token::ZEROORMORE, null_token);
+ repeatn_ = false;
+ }
+ // Small optimisation: Check for '+' equivalency.
+ else if (token_._min == 1)
+ {
+ token_.set (num_token::ONEORMORE, null_token);
+ repeatn_ = false;
+ }
+ }
+ else
+ {
+ if (ch_ < '0' || ch_ > '9')
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Missing '}' at index " << state_._index - 1 << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+
+ min_max_ = true;
+
+ do
+ {
+ token_._max *= 10;
+ token_._max += ch_ - '0';
+ eos_ = state_.next (ch_);
+ } while (!eos_ && ch_ >= '0' && ch_ <= '9');
+
+ if (eos_)
+ {
+ throw runtime_error ("Unexpected end of regex "
+ "(missing '}').");
+ }
+
+ // Small optimisation: Check for '?' equivalency.
+ if (token_._min == 0 && token_._max == 1)
+ {
+ token_.set (num_token::OPT, null_token);
+ repeatn_ = false;
+ }
+ // Small optimisation: if min == max, then min.
+ else if (token_._min == token_._max)
+ {
+ token_._comma = false;
+ min_max_ = false;
+ token_._max = 0;
+ }
+ }
+ }
+
+ if (ch_ != '}')
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Missing '}' at index " << state_._index - 1 << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+
+ if (repeatn_)
+ {
+ // SEMANTIC VALIDATION follows:
+ // NOTE: {0,} has already become *
+ // therefore we don't check for a comma.
+ if (token_._min == 0 && token_._max == 0)
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Cannot have exactly zero repeats preceding index " <<
+ state_._index << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+
+ if (min_max_ && token_._max < token_._min)
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Max less than min preceding index " << state_._index
+ << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+
+ token_.set (num_token::REPEATN, null_token);
+ }
+ }
+
+ static void macro (state &state_, num_token &token_)
+ {
+ CharT ch_ = 0;
+ bool eos_ = false;
+ const CharT *start_ = state_._curr;
+
+ state_.next (ch_);
+
+ if (ch_ != '_' && !(ch_ >= 'A' && ch_ <= 'Z') &&
+ !(ch_ >= 'a' && ch_ <= 'z'))
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Invalid MACRO name at index " <<
+ state_._index - 1 << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+
+ do
+ {
+ eos_ = state_.next (ch_);
+
+ if (eos_)
+ {
+ throw runtime_error ("Unexpected end of regex "
+ "(missing '}').");
+ }
+ } while (ch_ == '_' || ch_ == '-' || ch_ >= 'A' && ch_ <= 'Z' ||
+ ch_ >= 'a' && ch_ <= 'z' || ch_ >= '0' && ch_ <= '9');
+
+ if (ch_ != '}')
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Missing '}' at index " << state_._index - 1 << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+
+ std::size_t len_ = state_._curr - 1 - start_;
+
+ if (len_ > max_macro_len)
+ {
+ std::basic_stringstream<CharT> ss_;
+ std::ostringstream os_;
+
+ os_ << "MACRO name '";
+
+ while (len_)
+ {
+ os_ << ss_.narrow (*start_++, ' ');
+ --len_;
+ }
+
+ os_ << "' too long.";
+ throw runtime_error (os_.str ());
+ }
+
+ token_.set (num_token::MACRO, null_token);
+
+ // Some systems have memcpy in namespace std.
+ using namespace std;
+
+ memcpy (token_._macro, start_, len_ * sizeof (CharT));
+ token_._macro[len_] = 0;
+ }
+};
+}
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,550 @@
+// tokeniser_helper.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_RE_TOKENISER_HELPER_H
+#define BOOST_LEXER_RE_TOKENISER_HELPER_H
+
+#include "../../char_traits.hpp"
+// strlen()
+#include <cstring>
+#include "../../size_t.hpp"
+#include "re_tokeniser_state.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+template<typename CharT, typename Traits = char_traits<CharT> >
+class basic_re_tokeniser_helper
+{
+public:
+ typedef basic_re_tokeniser_state<CharT> state;
+ typedef std::basic_string<CharT> string;
+
+ static const CharT *escape_sequence (state &state_, CharT &ch_,
+ std::size_t &str_len_)
+ {
+ bool eos_ = state_.eos ();
+
+ if (eos_)
+ {
+ throw runtime_error ("Unexpected end of regex "
+ "following '\\'.");
+ }
+
+ const CharT *str_ = charset_shortcut (*state_._curr, str_len_);
+
+ if (str_)
+ {
+ state_.increment ();
+ }
+ else
+ {
+ ch_ = chr (state_);
+ }
+
+ return str_;
+ }
+
+ // This function can call itself.
+ static void charset (state &state_, string &chars_, bool &negated_)
+ {
+ CharT ch_ = 0;
+ bool eos_ = state_.next (ch_);
+
+ if (eos_)
+ {
+ // Pointless returning index if at end of string
+ throw runtime_error ("Unexpected end of regex "
+ "following '['.");
+ }
+
+ negated_ = ch_ == '^';
+
+ if (negated_)
+ {
+ eos_ = state_.next (ch_);
+
+ if (eos_)
+ {
+ // Pointless returning index if at end of string
+ throw runtime_error ("Unexpected end of regex "
+ "following '^'.");
+ }
+ }
+
+ bool chset_ = false;
+ CharT prev_ = 0;
+
+ while (ch_ != ']')
+ {
+ if (ch_ == '\\')
+ {
+ std::size_t str_len_ = 0;
+ const CharT *str_ = escape_sequence (state_, prev_, str_len_);
+
+ chset_ = str_ != 0;
+
+ if (chset_)
+ {
+ state temp_state_ (str_ + 1, str_ + str_len_,
+ state_._case_sensitive, state_._locale,
+ state_._dot_not_newline);
+ string temp_chars_;
+ bool temp_negated_ = false;
+
+ charset (temp_state_, temp_chars_, temp_negated_);
+
+ if (negated_ != temp_negated_)
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Mismatch in charset negation preceding "
+ "index " << state_._index - 1 << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+
+ chars_ += temp_chars_;
+ }
+ }
+/*
+ else if (ch_ == '[' && !state_.eos () && *state_._curr == ':')
+ {
+ // TODO: POSIX charsets
+ }
+*/
+ else
+ {
+ chset_ = false;
+ prev_ = ch_;
+ }
+
+ eos_ = state_.next (ch_);
+
+ // Covers preceding if, else if and else
+ if (eos_)
+ {
+ // Pointless returning index if at end of string
+ throw runtime_error ("Unexpected end of regex "
+ "(missing ']').");
+ }
+
+ if (ch_ == '-')
+ {
+ charset_range (chset_, state_, eos_, ch_, prev_, chars_);
+ }
+ else if (!chset_)
+ {
+ if (!state_._case_sensitive &&
+ (std::isupper (prev_, state_._locale) ||
+ std::islower (prev_, state_._locale)))
+ {
+ CharT upper_ = std::toupper (prev_, state_._locale);
+ CharT lower_ = std::tolower (prev_, state_._locale);
+
+ chars_ += upper_;
+ chars_ += lower_;
+ }
+ else
+ {
+ chars_ += prev_;
+ }
+ }
+ }
+
+ if (!negated_ && chars_.empty ())
+ {
+ throw runtime_error ("Empty charsets not allowed.");
+ }
+ }
+
+private:
+ static const char *charset_shortcut (const char ch_,
+ std::size_t &str_len_)
+ {
+ const char *str_ = 0;
+
+ switch (ch_)
+ {
+ case 'd':
+ str_ = "[0-9]";
+ break;
+ case 'D':
+ str_ = "[^0-9]";
+ break;
+ case 's':
+ str_ = "[ \t\n\r\f\v]";
+ break;
+ case 'S':
+ str_ = "[^ \t\n\r\f\v]";
+ break;
+ case 'w':
+ str_ = "[_0-9A-Za-z]";
+ break;
+ case 'W':
+ str_ = "[^_0-9A-Za-z]";
+ break;
+ }
+
+ if (str_)
+ {
+ // Some systems have strlen in namespace std.
+ using namespace std;
+
+ str_len_ = strlen (str_);
+ }
+ else
+ {
+ str_len_ = 0;
+ }
+
+ return str_;
+ }
+
+ static const wchar_t *charset_shortcut (const wchar_t ch_,
+ std::size_t &str_len_)
+ {
+ const wchar_t *str_ = 0;
+
+ switch (ch_)
+ {
+ case 'd':
+ str_ = L"[0-9]";
+ break;
+ case 'D':
+ str_ = L"[^0-9]";
+ break;
+ case 's':
+ str_ = L"[ \t\n\r\f\v]";
+ break;
+ case 'S':
+ str_ = L"[^ \t\n\r\f\v]";
+ break;
+ case 'w':
+ str_ = L"[_0-9A-Za-z]";
+ break;
+ case 'W':
+ str_ = L"[^_0-9A-Za-z]";
+ break;
+ }
+
+ if (str_)
+ {
+ // Some systems have wcslen in namespace std.
+ using namespace std;
+
+ str_len_ = wcslen (str_);
+ }
+ else
+ {
+ str_len_ = 0;
+ }
+
+ return str_;
+ }
+
+ static CharT chr (state &state_)
+ {
+ CharT ch_ = 0;
+
+ // eos_ has already been checked for.
+ switch (*state_._curr)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ ch_ = decode_octal (state_);
+ break;
+ case 'a':
+ ch_ = '\a';
+ state_.increment ();
+ break;
+ case 'b':
+ ch_ = '\b';
+ state_.increment ();
+ break;
+ case 'c':
+ ch_ = decode_control_char (state_);
+ break;
+ case 'e':
+ ch_ = 27; // '\e' not recognised by compiler
+ state_.increment ();
+ break;
+ case 'f':
+ ch_ = '\f';
+ state_.increment ();
+ break;
+ case 'n':
+ ch_ = '\n';
+ state_.increment ();
+ break;
+ case 'r':
+ ch_ = '\r';
+ state_.increment ();
+ break;
+ case 't':
+ ch_ = '\t';
+ state_.increment ();
+ break;
+ case 'v':
+ ch_ = '\v';
+ state_.increment ();
+ break;
+ case 'x':
+ ch_ = decode_hex (state_);
+ break;
+ default:
+ ch_ = *state_._curr;
+ state_.increment ();
+ break;
+ }
+
+ return ch_;
+ }
+
+ static CharT decode_octal (state &state_)
+ {
+ std::size_t accumulator_ = 0;
+ CharT ch_ = *state_._curr;
+ unsigned short count_ = 3;
+ bool eos_ = false;
+
+ for (;;)
+ {
+ accumulator_ *= 8;
+ accumulator_ += ch_ - '0';
+ --count_;
+ state_.increment ();
+ eos_ = state_.eos ();
+
+ if (!count_ || eos_) break;
+
+ ch_ = *state_._curr;
+
+ // Don't consume invalid chars!
+ if (ch_ < '0' || ch_ > '7')
+ {
+ break;
+ }
+ }
+
+ return static_cast<CharT> (accumulator_);
+ }
+
+ static CharT decode_control_char (state &state_)
+ {
+ // Skip over 'c'
+ state_.increment ();
+
+ CharT ch_ = 0;
+ bool eos_ = state_.next (ch_);
+
+ if (eos_)
+ {
+ // Pointless returning index if at end of string
+ throw runtime_error ("Unexpected end of regex following \\c.");
+ }
+ else
+ {
+ if (ch_ >= 'a' && ch_ <= 'z')
+ {
+ ch_ -= 'a' - 1;
+ }
+ else if (ch_ >= 'A' && ch_ <= 'Z')
+ {
+ ch_ -= 'A' - 1;
+ }
+ else if (ch_ == '@')
+ {
+ // Apparently...
+ ch_ = 0;
+ }
+ else
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Invalid control char at index " <<
+ state_._index - 1 << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+ }
+
+ return ch_;
+ }
+
+ static CharT decode_hex (state &state_)
+ {
+ // Skip over 'x'
+ state_.increment ();
+
+ CharT ch_ = 0;
+ bool eos_ = state_.next (ch_);
+
+ if (eos_)
+ {
+ // Pointless returning index if at end of string
+ throw runtime_error ("Unexpected end of regex following \\x.");
+ }
+
+ if (!((ch_ >= '0' && ch_ <= '9') || (ch_ >= 'a' && ch_ <= 'f') ||
+ (ch_ >= 'A' && ch_ <= 'F')))
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Illegal char following \\x at index " <<
+ state_._index - 1 << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+
+ std::size_t hex_ = 0;
+
+ do
+ {
+ hex_ *= 16;
+
+ if (ch_ >= '0' && ch_ <= '9')
+ {
+ hex_ += ch_ - '0';
+ }
+ else if (ch_ >= 'a' && ch_ <= 'f')
+ {
+ hex_ += 10 + (ch_ - 'a');
+ }
+ else
+ {
+ hex_ += 10 + (ch_ - 'A');
+ }
+
+ eos_ = state_.eos ();
+
+ if (!eos_)
+ {
+ ch_ = *state_._curr;
+
+ // Don't consume invalid chars!
+ if (((ch_ >= '0' && ch_ <= '9') ||
+ (ch_ >= 'a' && ch_ <= 'f') || (ch_ >= 'A' && ch_ <= 'F')))
+ {
+ state_.increment ();
+ }
+ else
+ {
+ eos_ = true;
+ }
+ }
+ } while (!eos_);
+
+ return static_cast<CharT> (hex_);
+ }
+
+ static void charset_range (const bool chset_, state &state_, bool &eos_,
+ CharT &ch_, const CharT prev_, string &chars_)
+ {
+ if (chset_)
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Charset cannot form start of range preceding "
+ "index " << state_._index - 1 << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+
+ eos_ = state_.next (ch_);
+
+ if (eos_)
+ {
+ // Pointless returning index if at end of string
+ throw runtime_error ("Unexpected end of regex "
+ "following '-'.");
+ }
+
+ CharT curr_ = 0;
+
+ if (ch_ == '\\')
+ {
+ std::size_t str_len_ = 0;
+
+ if (escape_sequence (state_, curr_, str_len_))
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Charset cannot form end of range preceding index "
+ << state_._index << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+ }
+/*
+ else if (ch_ == '[' && !state_.eos () && *state_._curr == ':')
+ {
+ std::ostringstream ss_;
+
+ ss_ << "POSIX char class cannot form end of range at "
+ "index " << state_._index - 1 << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+*/
+ else
+ {
+ curr_ = ch_;
+ }
+
+ eos_ = state_.next (ch_);
+
+ // Covers preceding if and else
+ if (eos_)
+ {
+ // Pointless returning index if at end of string
+ throw runtime_error ("Unexpected end of regex "
+ "(missing ']').");
+ }
+
+ std::size_t start_ = static_cast<typename Traits::index_type> (prev_);
+ std::size_t end_ = static_cast<typename Traits::index_type> (curr_);
+
+ // Semanic check
+ if (end_ < start_)
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Invalid range in charset preceding index " <<
+ state_._index - 1 << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+
+ chars_.reserve (chars_.size () + (end_ + 1 - start_));
+
+ for (; start_ <= end_; ++start_)
+ {
+ CharT ch_ = static_cast<CharT> (start_);
+
+ if (!state_._case_sensitive &&
+ (std::isupper (ch_, state_._locale) ||
+ std::islower (ch_, state_._locale)))
+ {
+ CharT upper_ = std::toupper (ch_, state_._locale);
+ CharT lower_ = std::tolower (ch_, state_._locale);
+
+ chars_ += (upper_);
+ chars_ += (lower_);
+ }
+ else
+ {
+ chars_ += (ch_);
+ }
+ }
+ }
+};
+}
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_state.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_state.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,94 @@
+// tokeniser_state.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_RE_TOKENISER_STATE_HPP
+#define BOOST_LEXER_RE_TOKENISER_STATE_HPP
+
+#include <locale>
+#include "../../size_t.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+template<typename CharT>
+struct basic_re_tokeniser_state
+{
+ const CharT *_curr;
+ const CharT * const _end;
+ bool _case_sensitive;
+ std::locale _locale;
+ bool _dot_not_newline;
+ std::size_t _index;
+ long _paren_count;
+ bool _in_string;
+ bool _seen_BOL_assertion;
+ bool _seen_EOL_assertion;
+
+ basic_re_tokeniser_state (const CharT *regex_, const CharT * const end_,
+ const bool case_sensitive_, const std::locale locale_,
+ const bool dot_not_newline_) :
+ _curr (regex_),
+ _end (end_),
+ _case_sensitive (case_sensitive_),
+ _locale (locale_),
+ _dot_not_newline (dot_not_newline_),
+ _index (0),
+ _paren_count (0),
+ _in_string (false),
+ _seen_BOL_assertion (false),
+ _seen_EOL_assertion (false)
+ {
+ }
+
+ // prevent VC++ 7.1 warning:
+ const basic_re_tokeniser_state &operator = (const basic_re_tokeniser_state &rhs_)
+ {
+ _curr = rhs_._curr;
+ _end = rhs_._end;
+ _case_sensitive = rhs_._case_sensitive;
+ _locale = rhs_._locale;
+ _dot_not_newline = rhs_._dot_not_newline;
+ _index = rhs_._index;
+ _paren_count = rhs_._paren_count;
+ _in_string = rhs_._in_string;
+ _seen_BOL_assertion = rhs_._seen_BOL_assertion;
+ _seen_EOL_assertion = rhs_._seen_EOL_assertion;
+ return this;
+ }
+
+ inline bool next (CharT &ch_)
+ {
+ if (_curr >= _end)
+ {
+ ch_ = 0;
+ return true;
+ }
+ else
+ {
+ ch_ = *_curr;
+ increment ();
+ return false;
+ }
+ }
+
+ inline void increment ()
+ {
+ ++_curr;
+ ++_index;
+ }
+
+ inline bool eos ()
+ {
+ return _curr >= _end;
+ }
+};
+}
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,82 @@
+// end_node.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_END_NODE_HPP
+#define BOOST_LEXER_END_NODE_HPP
+
+#include "node.hpp"
+#include "../../size_t.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+class end_node : public node
+{
+public:
+ end_node (const std::size_t id_, const std::size_t lexer_state_) :
+ node (false),
+ _id (id_),
+ _lexer_state (lexer_state_)
+ {
+ node::_firstpos.push_back (this);
+ node::_lastpos.push_back (this);
+ }
+
+ virtual ~end_node ()
+ {
+ }
+
+ virtual type what_type () const
+ {
+ return END;
+ }
+
+ virtual bool traverse (const_node_stack &/*node_stack_*/,
+ bool_stack &/*perform_op_stack_*/) const
+ {
+ return false;
+ }
+
+ virtual const node_vector &followpos () const
+ {
+ // _followpos is always empty..!
+ return _followpos;
+ }
+
+ virtual bool end_state () const
+ {
+ return true;
+ }
+
+ virtual std::size_t id () const
+ {
+ return _id;
+ }
+
+ virtual std::size_t lexer_state () const
+ {
+ return _lexer_state;
+ }
+
+private:
+ std::size_t _id;
+ std::size_t _lexer_state;
+ node_vector _followpos;
+
+ virtual void copy_node (node_ptr_vector &/*node_ptr_vector_*/,
+ node_stack &/*new_node_stack_*/, bool_stack &/*perform_op_stack_*/,
+ bool &/*down_*/) const
+ {
+ // Nothing to do, as end_nodes are not copied.
+ }
+};
+}
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/parser/tree/iteration_node.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/tree/iteration_node.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,90 @@
+// iteration_node.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_ITERATION_NODE_HPP
+#define BOOST_LEXER_ITERATION_NODE_HPP
+
+#include "node.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+class iteration_node : public node
+{
+public:
+ iteration_node (node *next_, const bool greedy_) :
+ node (true),
+ _next (next_),
+ _greedy (greedy_)
+ {
+ node_vector::iterator iter_;
+ node_vector::iterator end_;
+
+ _next->append_firstpos (_firstpos);
+ _next->append_lastpos (_lastpos);
+
+ for (iter_ = _lastpos.begin (), end_ = _lastpos.end ();
+ iter_ != end_; ++iter_)
+ {
+ (*iter_)->append_followpos (_firstpos);
+ }
+
+ for (iter_ = _firstpos.begin (), end_ = _firstpos.end ();
+ iter_ != end_; ++iter_)
+ {
+ (*iter_)->greedy (greedy_);
+ }
+ }
+
+ virtual ~iteration_node ()
+ {
+ }
+
+ virtual type what_type () const
+ {
+ return ITERATION;
+ }
+
+ virtual bool traverse (const_node_stack &node_stack_,
+ bool_stack &perform_op_stack_) const
+ {
+ perform_op_stack_.push (true);
+ node_stack_.push (_next);
+ return true;
+ }
+
+private:
+ // Not owner of this pointer...
+ node *_next;
+ bool _greedy;
+
+ virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+ node_stack &new_node_stack_, bool_stack &perform_op_stack_,
+ bool &down_) const
+ {
+ if (perform_op_stack_.top ())
+ {
+ node *ptr_ = new_node_stack_.top ();
+
+ node_ptr_vector_->push_back (0);
+ node_ptr_vector_->back () = new iteration_node (ptr_, _greedy);
+ new_node_stack_.top () = node_ptr_vector_->back ();
+ }
+ else
+ {
+ down_ = true;
+ }
+
+ perform_op_stack_.pop ();
+ }
+};
+}
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/parser/tree/leaf_node.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/tree/leaf_node.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,107 @@
+// leaf_node.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_LEAF_NODE_HPP
+#define BOOST_LEXER_LEAF_NODE_HPP
+
+#include "../../consts.hpp" // null_token
+#include "node.hpp"
+#include "../../size_t.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+class leaf_node : public node
+{
+public:
+ leaf_node (const std::size_t token_, const bool greedy_) :
+ node (token_ == null_token),
+ _token (token_),
+ _set_greedy (!greedy_),
+ _greedy (greedy_)
+ {
+ if (!_nullable)
+ {
+ _firstpos.push_back (this);
+ _lastpos.push_back (this);
+ }
+ }
+
+ virtual ~leaf_node ()
+ {
+ }
+
+ virtual void append_followpos (const node_vector &followpos_)
+ {
+ for (node_vector::const_iterator iter_ = followpos_.begin (),
+ end_ = followpos_.end (); iter_ != end_; ++iter_)
+ {
+ _followpos.push_back (*iter_);
+ }
+ }
+
+ virtual type what_type () const
+ {
+ return LEAF;
+ }
+
+ virtual bool traverse (const_node_stack &/*node_stack_*/,
+ bool_stack &/*perform_op_stack_*/) const
+ {
+ return false;
+ }
+
+ virtual std::size_t token () const
+ {
+ return _token;
+ }
+
+ virtual void greedy (const bool greedy_)
+ {
+ if (!_set_greedy)
+ {
+ _greedy = greedy_;
+ _set_greedy = true;
+ }
+ }
+
+ virtual bool greedy () const
+ {
+ return _greedy;
+ }
+
+ virtual const node_vector &followpos () const
+ {
+ return _followpos;
+ }
+
+ virtual node_vector &followpos ()
+ {
+ return _followpos;
+ }
+
+private:
+ std::size_t _token;
+ bool _set_greedy;
+ bool _greedy;
+ node_vector _followpos;
+
+ virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+ node_stack &new_node_stack_, bool_stack &/*perform_op_stack_*/,
+ bool &/*down_*/) const
+ {
+ node_ptr_vector_->push_back (0);
+ node_ptr_vector_->back () = new leaf_node (_token, _greedy);
+ new_node_stack_.push (node_ptr_vector_->back ());
+ }
+};
+}
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,182 @@
+// node.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_NODE_HPP
+#define BOOST_LEXER_NODE_HPP
+
+#include <assert.h>
+#include "../../containers/ptr_vector.hpp"
+#include "../../runtime_error.hpp"
+#include "../../size_t.hpp"
+#include <stack>
+#include <vector>
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+class node
+{
+public:
+ enum type {LEAF, SEQUENCE, SELECTION, ITERATION, END};
+
+ typedef std::stack<bool> bool_stack;
+ typedef std::stack<node *> node_stack;
+ // stack and vector not owner of node pointers
+ typedef std::stack<const node *> const_node_stack;
+ typedef std::vector<node *> node_vector;
+ typedef ptr_vector<node> node_ptr_vector;
+
+ node () :
+ _nullable (false)
+ {
+ }
+
+ node (const bool nullable_) :
+ _nullable (nullable_)
+ {
+ }
+
+ virtual ~node ()
+ {
+ }
+
+ bool nullable () const
+ {
+ return _nullable;
+ }
+
+ void append_firstpos (node_vector &firstpos_) const
+ {
+ firstpos_.insert (firstpos_.end (),
+ _firstpos.begin (), _firstpos.end ());
+ }
+
+ void append_lastpos (node_vector &lastpos_) const
+ {
+ lastpos_.insert (lastpos_.end (),
+ _lastpos.begin (), _lastpos.end ());
+ }
+
+ virtual void append_followpos (const node_vector &/*followpos_*/)
+ {
+ throw runtime_error ("Internal error node::append_followpos()");
+ }
+
+ node *copy (node_ptr_vector &node_ptr_vector_) const
+ {
+ node *new_root_ = 0;
+ const_node_stack node_stack_;
+ bool_stack perform_op_stack_;
+ bool down_ = true;
+ node_stack new_node_stack_;
+
+ node_stack_.push (this);
+
+ while (!node_stack_.empty ())
+ {
+ while (down_)
+ {
+ down_ = node_stack_.top ()->traverse (node_stack_,
+ perform_op_stack_);
+ }
+
+ while (!down_ && !node_stack_.empty ())
+ {
+ const node *top_ = node_stack_.top ();
+
+ top_->copy_node (node_ptr_vector_, new_node_stack_, perform_op_stack_, down_);
+
+ if (!down_) node_stack_.pop ();
+ }
+ }
+
+ assert (new_node_stack_.size () == 1);
+ new_root_ = new_node_stack_.top ();
+ new_node_stack_.pop ();
+ return new_root_;
+ }
+
+ virtual type what_type () const = 0;
+
+ virtual bool traverse (const_node_stack &node_stack_,
+ bool_stack &perform_op_stack_) const = 0;
+
+ node_vector &firstpos ()
+ {
+ return _firstpos;
+ }
+
+ const node_vector &firstpos () const
+ {
+ return _firstpos;
+ }
+
+ // _lastpos modified externally, so not const &
+ node_vector &lastpos ()
+ {
+ return _lastpos;
+ }
+
+ virtual bool end_state () const
+ {
+ return false;
+ }
+
+ virtual std::size_t id () const
+ {
+ throw runtime_error ("Internal error node::id()");
+ }
+
+ virtual std::size_t lexer_state () const
+ {
+ throw runtime_error ("Internal error node::state()");
+ }
+
+ virtual std::size_t token () const
+ {
+ throw runtime_error ("Internal error node::token()");
+ }
+
+ virtual void greedy (const bool /*greedy_*/)
+ {
+ throw runtime_error ("Internal error node::token(bool)");
+ }
+
+ virtual bool greedy () const
+ {
+ throw runtime_error ("Internal error node::token()");
+ }
+
+ virtual const node_vector &followpos () const
+ {
+ throw runtime_error ("Internal error node::followpos()");
+ }
+
+ virtual node_vector &followpos ()
+ {
+ throw runtime_error ("Internal error node::followpos()");
+ }
+
+protected:
+ const bool _nullable;
+ node_vector _firstpos;
+ node_vector _lastpos;
+
+ virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+ node_stack &new_node_stack_, bool_stack &perform_op_stack_,
+ bool &down_) const = 0;
+
+private:
+ node (node const &); // No copy construction.
+ node &operator = (node const &); // No assignment.
+};
+}
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/parser/tree/selection_node.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/tree/selection_node.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,94 @@
+// selection_node.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_SELECTION_NODE_HPP
+#define BOOST_LEXER_SELECTION_NODE_HPP
+
+#include "node.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+class selection_node : public node
+{
+public:
+ selection_node (node *left_, node *right_) :
+ node (left_->nullable () || right_->nullable ()),
+ _left (left_),
+ _right (right_)
+ {
+ _left->append_firstpos (_firstpos);
+ _right->append_firstpos (_firstpos);
+ _left->append_lastpos (_lastpos);
+ _right->append_lastpos (_lastpos);
+ }
+
+ virtual ~selection_node ()
+ {
+ }
+
+ virtual type what_type () const
+ {
+ return SELECTION;
+ }
+
+ virtual bool traverse (const_node_stack &node_stack_,
+ bool_stack &perform_op_stack_) const
+ {
+ perform_op_stack_.push (true);
+
+ switch (_right->what_type ())
+ {
+ case SEQUENCE:
+ case SELECTION:
+ case ITERATION:
+ perform_op_stack_.push (false);
+ break;
+ default:
+ break;
+ }
+
+ node_stack_.push (_right);
+ node_stack_.push (_left);
+ return true;
+ }
+
+private:
+ // Not owner of these pointers...
+ node *_left;
+ node *_right;
+
+ virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+ node_stack &new_node_stack_, bool_stack &perform_op_stack_,
+ bool &down_) const
+ {
+ if (perform_op_stack_.top ())
+ {
+ node *rhs_ = new_node_stack_.top ();
+
+ new_node_stack_.pop ();
+
+ node *lhs_ = new_node_stack_.top ();
+
+ node_ptr_vector_->push_back (0);
+ node_ptr_vector_->back () = new selection_node (lhs_, rhs_);
+ new_node_stack_.top () = node_ptr_vector_->back ();
+ }
+ else
+ {
+ down_ = true;
+ }
+
+ perform_op_stack_.pop ();
+ }
+};
+}
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/parser/tree/sequence_node.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/tree/sequence_node.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,112 @@
+// sequence_node.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_SEQUENCE_NODE_HPP
+#define BOOST_LEXER_SEQUENCE_NODE_HPP
+
+#include "node.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+class sequence_node : public node
+{
+public:
+ sequence_node (node *left_, node *right_) :
+ node (left_->nullable () && right_->nullable ()),
+ _left (left_),
+ _right (right_)
+ {
+ _left->append_firstpos (_firstpos);
+
+ if (_left->nullable ())
+ {
+ _right->append_firstpos (_firstpos);
+ }
+
+ if (_right->nullable ())
+ {
+ _left->append_lastpos (_lastpos);
+ }
+
+ _right->append_lastpos (_lastpos);
+
+ node_vector &lastpos_ = _left->lastpos ();
+ const node_vector &firstpos_ = _right->firstpos ();
+
+ for (node_vector::iterator iter_ = lastpos_.begin (),
+ end_ = lastpos_.end (); iter_ != end_; ++iter_)
+ {
+ (*iter_)->append_followpos (firstpos_);
+ }
+ }
+
+ virtual ~sequence_node ()
+ {
+ }
+
+ virtual type what_type () const
+ {
+ return SEQUENCE;
+ }
+
+ virtual bool traverse (const_node_stack &node_stack_,
+ bool_stack &perform_op_stack_) const
+ {
+ perform_op_stack_.push (true);
+
+ switch (_right->what_type ())
+ {
+ case SEQUENCE:
+ case SELECTION:
+ case ITERATION:
+ perform_op_stack_.push (false);
+ break;
+ default:
+ break;
+ }
+
+ node_stack_.push (_right);
+ node_stack_.push (_left);
+ return true;
+ }
+
+private:
+ // Not owner of these pointers...
+ node *_left;
+ node *_right;
+
+ virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+ node_stack &new_node_stack_, bool_stack &perform_op_stack_,
+ bool &down_) const
+ {
+ if (perform_op_stack_.top ())
+ {
+ node *rhs_ = new_node_stack_.top ();
+
+ new_node_stack_.pop ();
+
+ node *lhs_ = new_node_stack_.top ();
+
+ node_ptr_vector_->push_back (0);
+ node_ptr_vector_->back () = new sequence_node (lhs_, rhs_);
+ new_node_stack_.top () = node_ptr_vector_->back ();
+ }
+ else
+ {
+ down_ = true;
+ }
+
+ perform_op_stack_.pop ();
+ }
+};
+}
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/partition/charset.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/partition/charset.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,81 @@
+// charset.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_CHARSET_HPP
+#define BOOST_LEXER_CHARSET_HPP
+
+#include <set>
+#include "../size_t.hpp"
+#include "../string_token.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+template<typename CharT>
+struct basic_charset
+{
+ typedef basic_string_token<CharT> token;
+ typedef std::set<std::size_t> index_set;
+
+ token _token;
+ index_set _index_set;
+
+ basic_charset ()
+ {
+ }
+
+ basic_charset (const token &token_, const std::size_t index_) :
+ _token (token_)
+ {
+ _index_set.insert (index_);
+ }
+
+ bool empty () const
+ {
+ return _token.empty () && _index_set.empty ();
+ }
+
+ void intersect (basic_charset &rhs_, basic_charset &overlap_)
+ {
+ _token.intersect (rhs_._token, overlap_._token);
+
+ if (!overlap_._token.empty ())
+ {
+ typename index_set::const_iterator iter_ = _index_set.begin ();
+ typename index_set::const_iterator end_ = _index_set.end ();
+
+ for (; iter_ != end_; ++iter_)
+ {
+ overlap_._index_set.insert (*iter_);
+ }
+
+ iter_ = rhs_._index_set.begin ();
+ end_ = rhs_._index_set.end ();
+
+ for (; iter_ != end_; ++iter_)
+ {
+ overlap_._index_set.insert (*iter_);
+ }
+
+ if (_token.empty ())
+ {
+ _index_set.clear ();
+ }
+
+ if (rhs_._token.empty ())
+ {
+ rhs_._index_set.clear ();
+ }
+ }
+ }
+};
+}
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/partition/equivset.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/partition/equivset.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,150 @@
+// equivset.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_EQUIVSET_HPP
+#define BOOST_LEXER_EQUIVSET_HPP
+
+#include <algorithm>
+#include "../parser/tree/node.hpp"
+#include <set>
+#include "../size_t.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+struct equivset
+{
+ typedef std::set<std::size_t> index_set;
+ typedef std::vector<std::size_t> index_vector;
+ // Not owner of nodes:
+ typedef std::vector<node *> node_vector;
+
+ index_vector _index_vector;
+ bool _greedy;
+ std::size_t _id;
+ node_vector _followpos;
+
+ equivset () :
+ _greedy (true),
+ _id (0)
+ {
+ }
+
+ equivset (const index_set &index_set_, const bool greedy_,
+ const std::size_t id_, const node_vector &followpos_) :
+ _greedy (greedy_),
+ _id (id_),
+ _followpos (followpos_)
+ {
+ index_set::const_iterator iter_ = index_set_.begin ();
+ index_set::const_iterator end_ = index_set_.end ();
+
+ for (; iter_ != end_; ++iter_)
+ {
+ _index_vector.push_back (*iter_);
+ }
+ }
+
+ bool empty () const
+ {
+ return _index_vector.empty () && _followpos.empty ();
+ }
+
+ void intersect (equivset &rhs_, equivset &overlap_)
+ {
+ intersect_indexes (rhs_._index_vector, overlap_._index_vector);
+
+ if (!overlap_._index_vector.empty ())
+ {
+ overlap_._id = _id;
+
+ // LHS abstemious transitions have priority.
+ if (_greedy < rhs_._greedy)
+ {
+ overlap_._greedy = _greedy;
+ }
+ else
+ {
+ overlap_._greedy = _greedy;
+ }
+
+ // Note that the LHS takes priority in order to
+ // respect rule ordering priority in the lex spec.
+ overlap_._followpos = _followpos;
+
+ node_vector::const_iterator overlap_begin_ =
+ overlap_._followpos.begin ();
+ node_vector::const_iterator overlap_end_ =
+ overlap_._followpos.end ();
+ node_vector::const_iterator rhs_iter_ =
+ rhs_._followpos.begin ();
+ node_vector::const_iterator rhs_end_ =
+ rhs_._followpos.end ();
+
+ for (; rhs_iter_ != rhs_end_; ++rhs_iter_)
+ {
+ node *node_ = *rhs_iter_;
+
+ if (std::find (overlap_begin_, overlap_end_, node_) ==
+ overlap_end_)
+ {
+ overlap_._followpos.push_back (node_);
+ overlap_begin_ = overlap_._followpos.begin ();
+ overlap_end_ = overlap_._followpos.end ();
+ }
+ }
+
+ if (_index_vector.empty ())
+ {
+ _followpos.clear ();
+ }
+
+ if (rhs_._index_vector.empty ())
+ {
+ rhs_._followpos.clear ();
+ }
+ }
+ }
+
+private:
+ void intersect_indexes (index_vector &rhs_, index_vector &overlap_)
+ {
+ index_vector::iterator iter_ = _index_vector.begin ();
+ index_vector::iterator end_ = _index_vector.end ();
+ index_vector::iterator rhs_iter_ = rhs_.begin ();
+ index_vector::iterator rhs_end_ = rhs_.end ();
+
+ while (iter_ != end_ && rhs_iter_ != rhs_end_)
+ {
+ const std::size_t index_ = *iter_;
+ const std::size_t rhs_index_ = *rhs_iter_;
+
+ if (index_ < rhs_index_)
+ {
+ ++iter_;
+ }
+ else if (index_ > rhs_index_)
+ {
+ ++rhs_iter_;
+ }
+ else
+ {
+ overlap_.push_back (index_);
+ iter_ = _index_vector.erase (iter_);
+ end_ = _index_vector.end ();
+ rhs_iter_ = rhs_.erase (rhs_iter_);
+ rhs_end_ = rhs_.end ();
+ }
+ }
+ }
+};
+}
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/rules.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/rules.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,535 @@
+// rules.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_RULES_HPP
+#define BOOST_LEXER_RULES_HPP
+
+#include "consts.hpp"
+#include <deque>
+#include <locale>
+#include <map>
+#include "runtime_error.hpp"
+#include <set>
+#include "size_t.hpp"
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+ // return name of initial state
+ template <typename CharT>
+ struct initial;
+
+ template <>
+ struct initial<char>
+ {
+ static const char *str ()
+ {
+ return "INITIAL";
+ }
+ };
+
+ template <>
+ struct initial<wchar_t>
+ {
+ static const wchar_t *str ()
+ {
+ return L"INITIAL";
+ }
+ };
+}
+
+template<typename CharT>
+class basic_rules
+{
+public:
+ typedef std::vector<std::size_t> id_vector;
+ typedef std::deque<id_vector> id_vector_deque;
+ typedef std::basic_string<CharT> string;
+ typedef std::deque<string> string_deque;
+ typedef std::deque<string_deque> string_deque_deque;
+ typedef std::set<string> string_set;
+ typedef std::pair<string, string> string_pair;
+ typedef std::deque<string_pair> string_pair_deque;
+ typedef std::map<string, std::size_t> string_size_t_map;
+ typedef std::pair<string, std::size_t> string_size_t_pair;
+
+ basic_rules (const bool case_sensitive_ = true,
+ const bool dot_not_newline_ = true) :
+ _case_sensitive (case_sensitive_),
+ _dot_not_newline (dot_not_newline_)
+ {
+ add_state (initial ());
+ }
+
+ void clear ()
+ {
+ _statemap.clear ();
+ _macrodeque.clear ();
+ _macroset.clear ();
+ _regexes.clear ();
+ _ids.clear ();
+ _states.clear ();
+ _case_sensitive = true;
+ _locale = std::locale ();
+ _dot_not_newline = false;
+ add_state (initial ());
+ }
+
+ void clear (const CharT *state_name_)
+ {
+ std::size_t state_ = state (state_name_);
+
+ if (state_ != npos)
+ {
+ _regexes[state_].clear ();
+ _ids[state_].clear ();
+ _states[state_].clear ();
+ }
+ }
+
+ void case_sensitive (const bool case_sensitive_)
+ {
+ _case_sensitive = case_sensitive_;
+ }
+
+ bool case_sensitive () const
+ {
+ return _case_sensitive;
+ }
+
+ void locale (std::locale &locale_)
+ {
+ _locale = locale_;
+ }
+
+ const std::locale &locale () const
+ {
+ return _locale;
+ }
+
+ void dot_not_newline (const bool dot_not_newline_)
+ {
+ _dot_not_newline = dot_not_newline_;
+ }
+
+ bool dot_not_newline () const
+ {
+ return _dot_not_newline;
+ }
+
+ std::size_t state (const CharT *name_) const
+ {
+ std::size_t state_ = npos;
+ typename string_size_t_map::const_iterator iter_ =
+ _statemap.find (name_);
+
+ if (iter_ != _statemap.end ())
+ {
+ state_ = iter_->second;
+ }
+
+ return state_;
+ }
+
+ void add_state (const CharT *name_)
+ {
+ validate (name_, true);
+
+ if (_statemap.insert (string_size_t_pair (name_,
+ _statemap.size ())).second)
+ {
+ _regexes.push_back (string_deque ());
+ _ids.push_back (id_vector ());
+ _states.push_back (id_vector ());
+ }
+ }
+
+ void add_macro (const CharT *name_, const CharT *regex_)
+ {
+ add_macro (name_, string (regex_));
+ }
+
+ void add_macro (const CharT *name_, const CharT *regex_start_,
+ const CharT *regex_end_)
+ {
+ add_macro (name_, string (regex_start_, regex_end_));
+ }
+
+ void add_macro (const CharT *name_, const string &regex_)
+ {
+ validate (name_, false);
+
+ typename string_set::const_iterator iter_ = _macroset.find (name_);
+
+ if (iter_ == _macroset.end ())
+ {
+ _macrodeque.push_back (string_pair (name_, regex_));
+ _macroset.insert (name_);
+ }
+ else
+ {
+ std::basic_stringstream<CharT> ss_;
+ std::ostringstream os_;
+
+ os_ << "Attempt to redefine MACRO '";
+
+ while (*name_)
+ {
+ os_ << ss_.narrow (*name_++, static_cast<CharT> (' '));
+ }
+
+ os_ << "'.";
+ throw runtime_error (os_.str ());
+ }
+ }
+
+ void add (const CharT *regex_, const std::size_t id_)
+ {
+ add (string (regex_), id_);
+ }
+
+ void add (const CharT *regex_start_, const CharT *regex_end_,
+ const std::size_t id_)
+ {
+ add (string (regex_start_, regex_end_), id_);
+ }
+
+ void add (const string &regex_, const std::size_t id_)
+ {
+ check_for_invalid_id (id_);
+ _regexes[0].push_back (regex_);
+ _ids[0].push_back (id_);
+ _states[0].push_back (0);
+ }
+
+ void add (const CharT *curr_state_, const CharT *regex_,
+ const CharT *new_state_)
+ {
+ add (curr_state_, string (regex_), new_state_);
+ }
+
+ void add (const CharT *curr_state_, const CharT *regex_start_,
+ const CharT *regex_end_, const CharT *new_state_)
+ {
+ add (curr_state_, string (regex_start_, regex_end_), new_state_);
+ }
+
+ void add (const CharT *curr_state_, const string &regex_,
+ const CharT *new_state_)
+ {
+ add (curr_state_, regex_, 0, new_state_, false);
+ }
+
+ void add (const CharT *curr_state_, const CharT *regex_,
+ const std::size_t id_, const CharT *new_state_)
+ {
+ add (curr_state_, string (regex_), id_, new_state_);
+ }
+
+ void add (const CharT *curr_state_, const CharT *regex_start_,
+ const CharT *regex_end_, const std::size_t id_, const CharT *new_state_)
+ {
+ add (curr_state_, string (regex_start_, regex_end_), id_, new_state_);
+ }
+
+ void add (const CharT *curr_state_, const string &regex_,
+ const std::size_t id_, const CharT *new_state_)
+ {
+ add (curr_state_, regex_, id_, new_state_, true);
+ }
+
+ void add (const CharT *curr_state_, const basic_rules &rules_)
+ {
+ const string_deque_deque &regexes_ = rules_.regexes ();
+ const id_vector_deque &ids_ = rules_.ids ();
+ typename string_deque_deque::const_iterator state_regex_iter_ =
+ regexes_.begin ();
+ typename string_deque_deque::const_iterator state_regex_end_ =
+ regexes_.end ();
+ typename id_vector_deque::const_iterator state_id_iter_ =
+ ids_.begin ();
+ typename string_deque::const_iterator regex_iter_;
+ typename string_deque::const_iterator regex_end_;
+ typename id_vector::const_iterator id_iter_;
+
+ for (; state_regex_iter_ != state_regex_end_; ++state_regex_iter_)
+ {
+ regex_iter_ = state_regex_iter_->begin ();
+ regex_end_ = state_regex_iter_->end ();
+ id_iter_ = state_id_iter_->begin ();
+
+ for (; regex_iter_ != regex_end_; ++regex_iter_, ++id_iter_)
+ {
+ add (curr_state_, *regex_iter_, *id_iter_, curr_state_);
+ }
+ }
+ }
+
+ const string_size_t_map &statemap () const
+ {
+ return _statemap;
+ }
+
+ const string_pair_deque &macrodeque () const
+ {
+ return _macrodeque;
+ }
+
+ const string_deque_deque &regexes () const
+ {
+ return _regexes;
+ }
+
+ const id_vector_deque &ids () const
+ {
+ return _ids;
+ }
+
+ const id_vector_deque &states () const
+ {
+ return _states;
+ }
+
+ bool empty () const
+ {
+ typename string_deque_deque::const_iterator iter_ = _regexes.begin ();
+ typename string_deque_deque::const_iterator end_ = _regexes.end ();
+ bool empty_ = true;
+
+ for (; iter_ != end_; ++iter_)
+ {
+ if (!iter_->empty ())
+ {
+ empty_ = false;
+ break;
+ }
+ }
+
+ return empty_;
+ }
+
+ static const CharT *initial ()
+ {
+ return detail::initial<CharT>::str ();
+ }
+
+private:
+ string_size_t_map _statemap;
+ string_pair_deque _macrodeque;
+ string_set _macroset;
+ string_deque_deque _regexes;
+ id_vector_deque _ids;
+ id_vector_deque _states;
+ bool _case_sensitive;
+ std::locale _locale;
+ bool _dot_not_newline;
+
+ void add (const CharT *curr_state_, const string &regex_,
+ const std::size_t id_, const CharT *new_state_, const bool check_)
+ {
+ const bool star_ = *curr_state_ == '*' && *(curr_state_ + 1) == 0;
+ const bool dot_ = *new_state_ == '.' && *(new_state_ + 1) == 0;
+
+ if (check_)
+ {
+ check_for_invalid_id (id_);
+ }
+
+ if (!dot_)
+ {
+ validate (new_state_, true);
+ }
+
+ std::size_t new_ = string::npos;
+ typename string_size_t_map::const_iterator iter_;
+ typename string_size_t_map::const_iterator end_ = _statemap.end ();
+ id_vector states_;
+
+ if (!dot_)
+ {
+ iter_ = _statemap.find (new_state_);
+
+ if (iter_ == end_)
+ {
+ std::basic_stringstream<CharT> ss_;
+ std::ostringstream os_;
+
+ os_ << "Unknown state name '";
+
+ while (*new_state_)
+ {
+ os_ << ss_.narrow (*new_state_++, ' ');
+ }
+
+ os_ << "'.";
+ throw runtime_error (os_.str ());
+ }
+
+ new_ = iter_->second;
+ }
+
+ if (star_)
+ {
+ const std::size_t size_ = _statemap.size ();
+
+ for (std::size_t i_ = 0; i_ < size_; ++i_)
+ {
+ states_.push_back (i_);
+ }
+ }
+ else
+ {
+ const CharT *start_ = curr_state_;
+ string state_;
+
+ while (*curr_state_)
+ {
+ while (*curr_state_ && *curr_state_ != ',')
+ {
+ ++curr_state_;
+ }
+
+ state_.assign (start_, curr_state_);
+
+ if (*curr_state_)
+ {
+ ++curr_state_;
+ start_ = curr_state_;
+ }
+
+ validate (state_.c_str (), true);
+ iter_ = _statemap.find (state_.c_str ());
+
+ if (iter_ == end_)
+ {
+ std::basic_stringstream<CharT> ss_;
+ std::ostringstream os_;
+
+ os_ << "Unknown state name '";
+
+ while (*curr_state_)
+ {
+ os_ << ss_.narrow (*curr_state_++, ' ');
+ }
+
+ os_ << "'.";
+ throw runtime_error (os_.str ());
+ }
+
+ states_.push_back (iter_->second);
+ }
+ }
+
+ for (std::size_t i_ = 0, size_ = states_.size (); i_ < size_; ++i_)
+ {
+ const std::size_t curr_ = states_[i_];
+
+ _regexes[curr_].push_back (regex_);
+ _ids[curr_].push_back (id_);
+ _states[curr_].push_back (dot_ ? curr_ : new_);
+ }
+ }
+
+ void validate (const CharT *name_, const bool comma_) const
+ {
+again:
+ const CharT *start_ = name_;
+
+ if (*name_ != '_' && !(*name_ >= 'A' && *name_ <= 'Z') &&
+ !(*name_ >= 'a' && *name_ <= 'z'))
+ {
+ std::basic_stringstream<CharT> ss_;
+ std::ostringstream os_;
+
+ os_ << "Invalid name '";
+
+ while (*name_)
+ {
+ os_ << ss_.narrow (*name_++, ' ');
+ }
+
+ os_ << "'.";
+ throw runtime_error (os_.str ());
+ }
+ else if (*name_)
+ {
+ ++name_;
+ }
+
+ while (*name_)
+ {
+ if (*name_ == ',' && comma_)
+ {
+ ++name_;
+ goto again;
+ }
+
+ if (*name_ != '_' && *name_ != '-' &&
+ !(*name_ >= 'A' && *name_ <= 'Z') &&
+ !(*name_ >= 'a' && *name_ <= 'z') &&
+ !(*name_ >= '0' && *name_ <= '9'))
+ {
+ std::basic_stringstream<CharT> ss_;
+ std::ostringstream os_;
+
+ os_ << "Invalid name '";
+
+ while (*name_)
+ {
+ os_ << ss_.narrow (*name_++, ' ');
+ }
+
+ os_ << "'.";
+ throw runtime_error (os_.str ());
+ }
+
+ ++name_;
+ }
+
+ if (name_ - start_ > static_cast<std::ptrdiff_t>(max_macro_len))
+ {
+ std::basic_stringstream<CharT> ss_;
+ std::ostringstream os_;
+
+ os_ << "Name '";
+
+ while (*name_)
+ {
+ os_ << ss_.narrow (*name_++, ' ');
+ }
+
+ os_ << "' too long.";
+ throw runtime_error (os_.str ());
+ }
+ }
+
+ void check_for_invalid_id (const std::size_t id_) const
+ {
+ switch (id_)
+ {
+ case 0:
+ throw runtime_error ("id 0 is reserved for EOF.");
+ case npos:
+ throw runtime_error ("id npos is reserved for the "
+ "UNKNOWN token.");
+ default:
+ // OK
+ break;
+ }
+ }
+};
+
+typedef basic_rules<char> rules;
+typedef basic_rules<wchar_t> wrules;
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/runtime_error.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/runtime_error.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,26 @@
+// runtime_error.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_RUNTIME_ERROR_HPP
+#define BOOST_LEXER_RUNTIME_ERROR_HPP
+
+#include <stdexcept>
+
+namespace boost
+{
+namespace lexer
+{
+class runtime_error : public std::runtime_error
+{
+public:
+ runtime_error (const std::string &what_arg_) :
+ std::runtime_error (what_arg_)
+ {
+ }
+};
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/serialise.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/serialise.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,30 @@
+// examples/serialise.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_SERIALISE_HPP
+#define BOOST_LEXER_SERIALISE_HPP
+
+#include "state_machine.hpp"
+#include <boost/serialization/vector.hpp>
+
+namespace boost
+{
+namespace lexer
+{
+// IMPORTANT! This won't work if you don't enable RTTI!
+template<typename CharT, class Archive>
+void serialise (basic_state_machine<CharT> &sm_, Archive &ar_, unsigned int version_ = 1)
+{
+ ar_ & version_;
+ ar_ & *sm_._lookup;
+ ar_ & sm_._dfa_alphabet;
+ ar_ & *sm_._dfa;
+ ar_ & sm_._seen_BOL_assertion;
+ ar_ & sm_._seen_EOL_assertion;
+}
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/size_t.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/size_t.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,21 @@
+// size_t.h
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_SIZE_T_H
+#define BOOST_LEXER_SIZE_T_H
+
+#include <stddef.h> // ptrdiff_t
+
+#if defined _MSC_VER && _MSC_VER <= 1200
+namespace std
+{
+ using ::ptrdiff_t;
+ using ::size_t;
+}
+#else
+#include <string>
+#endif
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/state_machine.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/state_machine.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,448 @@
+// state_machine.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_STATE_MACHINE_HPP
+#define BOOST_LEXER_STATE_MACHINE_HPP
+
+#include <algorithm>
+#include "conversion/char_state_machine.hpp"
+#include "consts.hpp"
+#include <deque>
+#include <map>
+#include "containers/ptr_vector.hpp"
+#include "size_t.hpp"
+#include <string>
+
+namespace boost
+{
+namespace lexer
+{
+template<typename CharT>
+class basic_state_machine
+{
+public:
+ class iterator
+ {
+ public:
+#if defined _MSC_VER && _MSC_VER <= 1200
+ friend basic_state_machine;
+#else
+ friend class basic_state_machine;
+#endif
+
+ struct data
+ {
+ // Current iterator info
+ std::size_t dfa;
+ std::size_t states;
+ std::size_t state;
+ std::size_t transitions;
+ std::size_t transition;
+
+ // Current state info
+ bool end_state;
+ std::size_t id;
+ std::size_t goto_dfa;
+ std::size_t bol_index;
+ std::size_t eol_index;
+
+ // Current transition info
+ basic_string_token<CharT> token;
+ std::size_t goto_state;
+
+ data () :
+ dfa (npos),
+ states (0),
+ state (npos),
+ transitions (0),
+ transition (npos),
+ end_state (false),
+ id (npos),
+ goto_dfa (npos),
+ bol_index (npos),
+ eol_index (npos),
+ goto_state (npos)
+ {
+ }
+
+ bool operator == (const data &rhs_) const
+ {
+ return dfa == rhs_.dfa &&
+ states == rhs_.states &&
+ state == rhs_.state &&
+ transitions == rhs_.transitions &&
+ transition == rhs_.transition &&
+ end_state == rhs_.end_state &&
+ id == rhs_.id &&
+ goto_dfa == rhs_.goto_dfa &&
+ bol_index == rhs_.bol_index &&
+ eol_index == rhs_.eol_index &&
+ token == rhs_.token &&
+ transition == rhs_.transition;
+ }
+ };
+
+ iterator () :
+ _sm (0),
+ _dfas (0),
+ _dfa (npos),
+ _states (0),
+ _state (npos),
+ _transitions (0),
+ _transition (npos)
+ {
+ }
+
+ bool operator == (const iterator &rhs_) const
+ {
+ return _dfas == rhs_._dfas && _dfa == rhs_._dfa &&
+ _states == rhs_._states && _state == rhs_._state &&
+ _transitions == rhs_._transitions &&
+ _transition == rhs_._transition;
+ }
+
+ bool operator != (const iterator &rhs_) const
+ {
+ return !(*this == rhs_);
+ }
+
+ data &operator * ()
+ {
+ return _data;
+ }
+
+ data *operator -> ()
+ {
+ return &_data;
+ }
+
+ // Let compiler generate operator = ().
+
+ // prefix version
+ iterator &operator ++ ()
+ {
+ next ();
+ return *this;
+ }
+
+ // postfix version
+ iterator operator ++ (int)
+ {
+ iterator iter_ = *this;
+
+ next ();
+ return iter_;
+ }
+
+ void clear ()
+ {
+ _dfas = _states = _transitions = 0;
+ _dfa = _state = _transition = npos;
+ }
+
+ private:
+ basic_state_machine *_sm;
+ data _data;
+ std::size_t _dfas;
+ std::size_t _dfa;
+ std::size_t _states;
+ std::size_t _state;
+ std::size_t _transitions;
+ std::size_t _transition;
+ typename detail::basic_char_state_machine<CharT>::state::
+ size_t_string_token_map::const_iterator _token_iter;
+ typename detail::basic_char_state_machine<CharT>::state::
+ size_t_string_token_map::const_iterator _token_end;
+
+ void next ()
+ {
+ bool reset_state_ = false;
+
+ if (_transition >= _transitions)
+ {
+ _transition = _data.transition = 0;
+ _data.state = ++_state;
+ reset_state_ = true;
+
+ if (_state >= _states)
+ {
+ ++_dfa;
+
+ if (_dfa >= _dfas)
+ {
+ clear ();
+ reset_state_ = false;
+ }
+ else
+ {
+ _states = _sm->_csm._sm_vector[_dfa].size ();
+ _state = _data.state = 0;
+ }
+ }
+ }
+ else
+ {
+ _data.transition = _transition;
+ }
+
+ if (reset_state_)
+ {
+ const typename detail::basic_char_state_machine<CharT>::
+ state *ptr_ = &_sm->_csm._sm_vector[_dfa][_state];
+
+ _transitions = _data.transitions = ptr_->_transitions.size ();
+ _data.end_state = ptr_->_end_state;
+ _data.id = ptr_->_id;
+ _data.goto_dfa = ptr_->_state;
+ _data.bol_index = ptr_->_bol_index;
+ _data.eol_index = ptr_->_eol_index;
+ _token_iter = ptr_->_transitions.begin ();
+ _token_end = ptr_->_transitions.end ();
+ }
+
+ if (_token_iter != _token_end)
+ {
+ _data.token = _token_iter->second;
+ _data.goto_state = _token_iter->first;
+ ++_token_iter;
+ ++_transition;
+ }
+ else
+ {
+ _data.token.clear ();
+ _data.goto_state = npos;
+ }
+ }
+ };
+
+#if defined _MSC_VER && _MSC_VER <= 1200
+ friend iterator;
+#else
+ friend class iterator;
+#endif
+
+ basic_state_machine () :
+ _seen_BOL_assertion (false),
+ _seen_EOL_assertion (false)
+ {
+ }
+
+ void clear ()
+ {
+ _lookup.clear ();
+ _dfa_alphabet.clear ();
+ _dfa.clear ();
+ _seen_BOL_assertion = false;
+ _seen_EOL_assertion = false;
+ _csm.clear ();
+ }
+
+ bool empty () const
+ {
+ // Don't include _csm in this test, as irrelevant to state.
+ return _lookup->empty () && _dfa_alphabet.empty () &&
+ _dfa->empty ();
+ }
+
+ std::size_t size () const
+ {
+ return _dfa->size ();
+ }
+
+ bool operator == (const basic_state_machine &rhs_) const
+ {
+ // Don't include _csm in this test, as irrelevant to state.
+ return _lookup == rhs_._lookup &&
+ _dfa_alphabet == rhs_._dfa_alphabet &&
+ _dfa == rhs_._dfa &&
+ _seen_BOL_assertion == rhs_._seen_BOL_assertion &&
+ _seen_EOL_assertion == rhs_._seen_EOL_assertion;
+ }
+
+ iterator begin () const
+ {
+ iterator iter_;
+
+ iter_._sm = const_cast<basic_state_machine *>(this);
+ check_for_csm ();
+
+ if (!_csm.empty())
+ {
+ const typename detail::basic_char_state_machine<CharT>::
+ state_vector *ptr_ = &_csm._sm_vector[0];
+
+ iter_._dfas = _csm._sm_vector.size ();
+ iter_._states = iter_._data.states = ptr_->size ();
+ iter_._transitions = iter_._data.transitions =
+ ptr_->front ()._transitions.size ();
+ iter_._dfa = iter_._data.dfa = 0;
+ iter_._state = iter_._data.state = 0;
+ iter_._transition = 0;
+ iter_._data.end_state = ptr_->front ()._end_state;
+ iter_._data.id = ptr_->front ()._id;
+ iter_._data.goto_dfa = ptr_->front ()._state;
+ iter_._data.bol_index = ptr_->front ()._bol_index;
+ iter_._data.eol_index = ptr_->front ()._eol_index;
+ iter_._token_iter = ptr_->front ()._transitions.begin ();
+ iter_._token_end = ptr_->front ()._transitions.end ();
+ ++iter_;
+ }
+
+ return iter_;
+ }
+
+ iterator end () const
+ {
+ iterator iter_;
+
+ iter_._sm = const_cast<basic_state_machine *>(this);
+ return iter_;
+ }
+
+ void swap (basic_state_machine &sm_)
+ {
+ _lookup->swap (*sm_._lookup);
+ _dfa_alphabet.swap (sm_._dfa_alphabet);
+ _dfa->swap (*sm_._dfa);
+ std::swap (_seen_BOL_assertion, sm_._seen_BOL_assertion);
+ std::swap (_seen_EOL_assertion, sm_._seen_EOL_assertion);
+ _csm.swap (sm_._csm);
+ }
+
+// VC++ 6, 7.1 and 8 can't cope with template friend classes!
+#if !(defined _MSC_VER && _MSC_VER < 1500)
+private:
+#endif
+ typedef std::vector<std::size_t> size_t_vector;
+ typedef detail::ptr_vector<size_t_vector> size_t_vector_vector;
+
+ size_t_vector_vector _lookup;
+ size_t_vector _dfa_alphabet;
+ size_t_vector_vector _dfa;
+ bool _seen_BOL_assertion;
+ bool _seen_EOL_assertion;
+ mutable detail::basic_char_state_machine<CharT> _csm;
+
+ void check_for_csm () const
+ {
+ if (_csm.empty ())
+ {
+ human_readable (_csm);
+ }
+ }
+
+ void human_readable (detail::basic_char_state_machine<CharT> &sm_) const
+ {
+ const std::size_t max_ = sizeof (CharT) == 1 ?
+ num_chars : num_wchar_ts;
+ const std::size_t start_states_ = _dfa->size ();
+
+ sm_.clear ();
+ sm_._sm_vector.resize (start_states_);
+
+ for (std::size_t start_state_index_ = 0;
+ start_state_index_ < start_states_; ++start_state_index_)
+ {
+ const size_t_vector *lu_ = _lookup[start_state_index_];
+ const std::size_t alphabet_ = _dfa_alphabet[start_state_index_] - dfa_offset;
+ std::vector<std::basic_string<CharT> > chars_ (alphabet_);
+ const std::size_t states_ = _dfa[start_state_index_]->size () /
+ (alphabet_ + dfa_offset);
+ const std::size_t *read_ptr_ = &_dfa[start_state_index_]->
+ front () + alphabet_ + dfa_offset;
+
+ sm_._sm_vector[start_state_index_].resize (states_ - 1);
+
+ for (std::size_t alpha_index_ = 0; alpha_index_ < max_;
+ ++alpha_index_)
+ {
+ const std::size_t col_ = lu_->at (alpha_index_);
+
+ if (col_ != dead_state_index)
+ {
+ chars_[col_ - dfa_offset] += static_cast<CharT>
+ (alpha_index_);
+ }
+ }
+
+ for (std::size_t state_index_ = 1; state_index_ < states_;
+ ++state_index_)
+ {
+ typename detail::basic_char_state_machine<CharT>::state
+ *state_ = &sm_._sm_vector[start_state_index_]
+ [state_index_ - 1];
+
+ state_->_end_state = *read_ptr_ != 0;
+ state_->_id = *(read_ptr_ + id_index);
+ state_->_state = *(read_ptr_ + state_index);
+ state_->_bol_index = *(read_ptr_ + bol_index) - 1;
+ state_->_eol_index = *(read_ptr_ + eol_index) - 1;
+ read_ptr_ += dfa_offset;
+
+ for (std::size_t col_index_ = 0; col_index_ < alphabet_;
+ ++col_index_, ++read_ptr_)
+ {
+ const std::size_t transition_ = *read_ptr_;
+
+ if (transition_ != 0)
+ {
+ const std::size_t i_ = transition_ - 1;
+ typename detail::basic_char_state_machine<CharT>::
+ state::size_t_string_token_map::iterator iter_ =
+ state_->_transitions.find (i_);
+
+ if (iter_ == state_->_transitions.end ())
+ {
+ basic_string_token<CharT> token_
+ (false, chars_[col_index_]);
+ typename detail::basic_char_state_machine<CharT>::
+ state::size_t_string_token_pair pair_
+ (i_, token_);
+
+ state_->_transitions.insert (pair_);
+ }
+ else
+ {
+ iter_->second._charset += chars_[col_index_];
+ }
+ }
+ }
+
+ for (typename detail::basic_char_state_machine<CharT>::state::
+ size_t_string_token_map::iterator iter_ =
+ state_->_transitions.begin (),
+ end_ = state_->_transitions.end ();
+ iter_ != end_; ++iter_)
+ {
+ std::sort (iter_->second._charset.begin (),
+ iter_->second._charset.end ());
+ iter_->second.normalise ();
+ }
+ }
+ }
+ }
+
+#if !(defined _MSC_VER && _MSC_VER < 1500)
+ template<typename ChT, typename Traits>
+ friend class basic_file_input;
+
+ template<typename ChT, typename Traits>
+ friend class basic_generator;
+
+ template<typename FwdIter, typename Traits>
+ friend class basic_input;
+
+ template<typename ChT, class Archive>
+ friend void serialise (basic_state_machine &sm_, Archive &ar_,
+ unsigned int version_);
+#endif
+};
+
+typedef basic_state_machine<char> state_machine;
+typedef basic_state_machine<wchar_t> wstate_machine;
+}
+}
+
+#endif

Added: trunk/boost/spirit/home/support/detail/lexer/string_token.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/spirit/home/support/detail/lexer/string_token.hpp 2008-07-15 17:26:49 EDT (Tue, 15 Jul 2008)
@@ -0,0 +1,336 @@
+// string_token.hpp
+// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_STRING_TOKEN_HPP
+#define BOOST_LEXER_STRING_TOKEN_HPP
+
+#include <algorithm>
+#include "size_t.hpp"
+#include "consts.hpp" // num_chars, num_wchar_ts
+#include <string>
+
+namespace boost
+{
+namespace lexer
+{
+template<typename CharT>
+struct basic_string_token
+{
+ typedef std::basic_string<CharT> string;
+
+ bool _negated;
+ string _charset;
+
+ basic_string_token () :
+ _negated (false)
+ {
+ }
+
+ basic_string_token (const bool negated_, const string &charset_) :
+ _negated (negated_),
+ _charset (charset_)
+ {
+ }
+
+ void remove_duplicates ()
+ {
+ const CharT *start_ = _charset.c_str ();
+ const CharT *end_ = start_ + _charset.size ();
+
+ // Optimisation for very large charsets:
+ // sorting via pointers is much quicker than
+ // via iterators...
+ std::sort (const_cast<CharT *> (start_), const_cast<CharT *> (end_));
+ _charset.erase (std::unique (_charset.begin (), _charset.end ()),
+ _charset.end ());
+ }
+
+ void normalise ()
+ {
+ const std::size_t max_chars_ = sizeof (CharT) == 1 ?
+ num_chars : num_wchar_ts;
+
+ if (_charset.length () == max_chars_)
+ {
+ _negated = !_negated;
+#if defined _MSC_VER && _MSC_VER <= 1200
+ _charset.erase ();
+#else
+ _charset.clear ();
+#endif
+ }
+ else if (_charset.length () > max_chars_ / 2)
+ {
+ negate ();
+ }
+ }
+
+ void negate ()
+ {
+ const std::size_t max_chars_ = sizeof (CharT) == 1 ?
+ num_chars : num_wchar_ts;
+ CharT curr_char_ = sizeof (CharT) == 1 ? -128 : 0;
+ string temp_;
+ const CharT *curr_ = _charset.c_str ();
+ const CharT *chars_end_ = curr_ + _charset.size ();
+
+ _negated = !_negated;
+ temp_.resize (max_chars_ - _charset.size ());
+
+ CharT *ptr_ = const_cast<CharT *> (temp_.c_str ());
+ std::size_t i_ = 0;
+
+ while (curr_ < chars_end_)
+ {
+ while (*curr_ > curr_char_)
+ {
+ *ptr_ = curr_char_;
+ ++ptr_;
+ ++curr_char_;
+ ++i_;
+ }
+
+ ++curr_char_;
+ ++curr_;
+ ++i_;
+ }
+
+ for (; i_ < max_chars_; ++i_)
+ {
+ *ptr_ = curr_char_;
+ ++ptr_;
+ ++curr_char_;
+ }
+
+ _charset = temp_;
+ }
+
+ bool operator < (const basic_string_token &rhs_) const
+ {
+ return _negated < rhs_._negated ||
+ (_negated == rhs_._negated && _charset < rhs_._charset);
+ }
+
+ bool empty () const
+ {
+ return _charset.empty () && !_negated;
+ }
+
+ bool any () const
+ {
+ return _charset.empty () && _negated;
+ }
+
+ void clear ()
+ {
+ _negated = false;
+#if defined _MSC_VER && _MSC_VER <= 1200
+ _charset.erase ();
+#else
+ _charset.clear ();
+#endif
+ }
+
+ void intersect (basic_string_token &rhs_, basic_string_token &overlap_)
+ {
+ if (any () && rhs_.any () || (_negated == rhs_._negated &&
+ !any () && !rhs_.any ()))
+ {
+ intersect_same_types (rhs_, overlap_);
+ }
+ else
+ {
+ intersect_diff_types (rhs_, overlap_);
+ }
+ }
+
+private:
+ void intersect_same_types (basic_string_token &rhs_, basic_string_token &overlap_)
+ {
+ if (any ())
+ {
+ clear ();
+ overlap_._negated = true;
+ rhs_.clear ();
+ }
+ else
+ {
+ typename string::iterator iter_ = _charset.begin ();
+ typename string::iterator end_ = _charset.end ();
+ typename string::iterator rhs_iter_ = rhs_._charset.begin ();
+ typename string::iterator rhs_end_ = rhs_._charset.end ();
+
+ overlap_._negated = _negated;
+
+ while (iter_ != end_ && rhs_iter_ != rhs_end_)
+ {
+ if (*iter_ < *rhs_iter_)
+ {
+ ++iter_;
+ }
+ else if (*iter_ > *rhs_iter_)
+ {
+ ++rhs_iter_;
+ }
+ else
+ {
+ overlap_._charset += *iter_;
+ iter_ = _charset.erase (iter_);
+ end_ = _charset.end ();
+ rhs_iter_ = rhs_._charset.erase (rhs_iter_);
+ rhs_end_ = rhs_._charset.end ();
+ }
+ }
+
+ if (_negated)
+ {
+ // duplicates already merged, so safe to merge
+ // using std lib.
+
+ // src, dest
+ merge (_charset, overlap_._charset);
+ // duplicates already merged, so safe to merge
+ // using std lib.
+
+ // src, dest
+ merge (rhs_._charset, overlap_._charset);
+ _negated = false;
+ rhs_._negated = false;
+ std::swap (_charset, rhs_._charset);
+ normalise ();
+ overlap_.normalise ();
+ rhs_.normalise ();
+ }
+ else if (!overlap_._charset.empty ())
+ {
+ normalise ();
+ overlap_.normalise ();
+ rhs_.normalise ();
+ }
+ }
+ }
+
+ void intersect_diff_types (basic_string_token &rhs_,
+ basic_string_token &overlap_)
+ {
+ if (any ())
+ {
+ intersect_any (rhs_, overlap_);
+ }
+ else if (_negated)
+ {
+ intersect_negated (rhs_, overlap_);
+ }
+ else // _negated == false
+ {
+ intersect_charset (rhs_, overlap_);
+ }
+ }
+
+ void intersect_any (basic_string_token &rhs_, basic_string_token &overlap_)
+ {
+ if (rhs_._negated)
+ {
+ rhs_.intersect_negated (*this, overlap_);
+ }
+ else // rhs._negated == false
+ {
+ rhs_.intersect_charset (*this, overlap_);
+ }
+ }
+
+ void intersect_negated (basic_string_token &rhs_,
+ basic_string_token &overlap_)
+ {
+ if (rhs_.any ())
+ {
+ overlap_._negated = true;
+ overlap_._charset = _charset;
+ rhs_._negated = false;
+ rhs_._charset = _charset;
+ clear ();
+ }
+ else // rhs._negated == false
+ {
+ rhs_.intersect_charset (*this, overlap_);
+ }
+ }
+
+ void intersect_charset (basic_string_token &rhs_,
+ basic_string_token &overlap_)
+ {
+ if (rhs_.any ())
+ {
+ overlap_._charset = _charset;
+ rhs_._negated = true;
+ rhs_._charset = _charset;
+ clear ();
+ }
+ else // rhs_._negated == true
+ {
+ typename string::iterator iter_ = _charset.begin ();
+ typename string::iterator end_ = _charset.end ();
+ typename string::iterator rhs_iter_ = rhs_._charset.begin ();
+ typename string::iterator rhs_end_ = rhs_._charset.end ();
+
+ while (iter_ != end_ && rhs_iter_ != rhs_end_)
+ {
+ if (*iter_ < *rhs_iter_)
+ {
+ overlap_._charset += *iter_;
+ rhs_iter_ = rhs_._charset.insert (rhs_iter_, *iter_);
+ ++rhs_iter_;
+ rhs_end_ = rhs_._charset.end ();
+ iter_ = _charset.erase (iter_);
+ end_ = _charset.end ();
+ }
+ else if (*iter_ > *rhs_iter_)
+ {
+ ++rhs_iter_;
+ }
+ else
+ {
+ ++iter_;
+ ++rhs_iter_;
+ }
+ }
+
+ if (iter_ != end_)
+ {
+ // nothing bigger in rhs_ than iter_,
+ // so safe to merge using std lib.
+ string temp_ (iter_, end_);
+
+ // src, dest
+ merge (temp_, overlap_._charset);
+ _charset.erase (iter_, end_);
+ }
+
+ if (!overlap_._charset.empty ())
+ {
+ merge (overlap_._charset, rhs_._charset);
+ // possible duplicates, so check for any and erase.
+ rhs_._charset.erase (std::unique (rhs_._charset.begin (),
+ rhs_._charset.end ()), rhs_._charset.end ());
+ normalise ();
+ overlap_.normalise ();
+ rhs_.normalise ();
+ }
+ }
+ }
+
+ void merge (string &src_, string &dest_)
+ {
+ string tmp_ (src_.size () + dest_.size (), 0);
+
+ std::merge (src_.begin (), src_.end (), dest_.begin (), dest_.end (),
+ tmp_.begin ());
+ dest_ = tmp_;
+ }
+};
+}
+}
+
+#endif


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk