|
Boost-Commit : |
Subject: [Boost-commit] svn:boost r49740 - trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser
From: jamin.hanson_at_[hidden]
Date: 2008-11-14 10:03:04
Author: ben_hanson
Date: 2008-11-14 10:03:03 EST (Fri, 14 Nov 2008)
New Revision: 49740
URL: http://svn.boost.org/trac/boost/changeset/49740
Log:
icase and dot_not_newline flags, bug fixes.
Text files modified:
trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp | 130 +++++++++++++++++++++++++++++++++++----
1 files changed, 115 insertions(+), 15 deletions(-)
Modified: trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp (original)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp 2008-11-14 10:03:03 EST (Fri, 14 Nov 2008)
@@ -1,5 +1,5 @@
// tokeniser.hpp
-// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)
+// Copyright (c) 2007-2008 Ben Hanson (http://www.benhanson.net/)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -54,6 +54,12 @@
"(missing '\"').");
}
+ if (state_._paren_count)
+ {
+ throw runtime_error ("Unexpected end of regex "
+ "(missing ')').");
+ }
+
token_.set (num_token::END, null_token);
}
else
@@ -78,6 +84,7 @@
case '(':
token_.set (num_token::OPENPAREN, null_token);
++state_._paren_count;
+ read_options (state_);
break;
case ')':
--state_._paren_count;
@@ -87,11 +94,17 @@
std::ostringstream ss_;
ss_ << "Number of open parenthesis < 0 at index " <<
- state_._index - 1 << '.';
+ state_.index () - 1 << '.';
throw runtime_error (ss_.str ().c_str ());
}
token_.set (num_token::CLOSEPAREN, null_token);
+
+ if (!state_._flags_stack.empty ())
+ {
+ state_._flags = state_._flags_stack.top ();
+ state_._flags_stack.pop ();
+ }
break;
case '?':
if (!state_.eos () && *state_._curr == '?')
@@ -136,18 +149,36 @@
token_.set (num_token::OR, null_token);
break;
case '^':
- token_.set (num_token::CHARSET, bol_token);
+ if (state_._curr - 1 == state_._start)
+ {
+ token_.set (num_token::CHARSET, bol_token);
+ }
+ else
+ {
+ create_charset_token (string (1, ch_), false,
+ map_, token_);
+ }
+
state_._seen_BOL_assertion = true;
break;
case '$':
- token_.set (num_token::CHARSET, eol_token);
+ if (state_._curr == state_._end)
+ {
+ token_.set (num_token::CHARSET, eol_token);
+ }
+ else
+ {
+ create_charset_token (string (1, ch_), false,
+ map_, token_);
+ }
+
state_._seen_EOL_assertion = true;
break;
case '.':
{
string dot_;
- if (state_._dot_not_newline)
+ if (state_._flags & dot_not_newline)
{
dot_ = '\n';
}
@@ -161,7 +192,7 @@
break;
}
default:
- if (!state_._case_sensitive &&
+ if ((state_._flags & icase) &&
(std::isupper (ch_, state_._locale) ||
std::islower (ch_, state_._locale)))
{
@@ -188,6 +219,74 @@
private:
typedef basic_re_tokeniser_helper<CharT> tokeniser_helper;
+ static void read_options (state &state_)
+ {
+ if (!state_.eos () && *state_._curr == '?')
+ {
+ CharT ch_ = 0;
+ bool eos_ = false;
+ bool negate_ = false;
+
+ state_.increment ();
+ eos_ = state_.next (ch_);
+ state_._flags_stack.push (state_._flags);
+
+ while (!eos_ && ch_ != ':')
+ {
+ switch (ch_)
+ {
+ case '-':
+ negate_ ^= 1;
+ break;
+ case 'i':
+ if (negate_)
+ {
+ state_._flags = static_cast<regex_flags>
+ (state_._flags & ~icase);
+ }
+ else
+ {
+ state_._flags = static_cast<regex_flags>
+ (state_._flags | icase);
+ }
+
+ negate_ = false;
+ break;
+ case 's':
+ if (negate_)
+ {
+ state_._flags = static_cast<regex_flags>
+ (state_._flags | dot_not_newline);
+ }
+ else
+ {
+ state_._flags = static_cast<regex_flags>
+ (state_._flags & ~dot_not_newline);
+ }
+
+ negate_ = false;
+ break;
+ default:
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Unknown option at " <<
+ state_.index () - 1 << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+ }
+
+ eos_ = state_.next (ch_);
+ }
+
+ // End of string handler will handle early termination
+ }
+ else if (!state_._flags_stack.empty ())
+ {
+ state_._flags_stack.push (state_._flags);
+ }
+ }
+
static void escape (state &state_, token_map &map_, num_token &token_)
{
CharT ch_ = 0;
@@ -197,8 +296,8 @@
if (str_)
{
- state state2_ (str_ + 1, str_ + str_len_, state_._case_sensitive,
- state_._locale, state_._dot_not_newline);
+ state state2_ (str_ + 1, str_ + str_len_, state_._flags,
+ state_._locale);
charset (state2_, map_, token_);
}
@@ -328,7 +427,8 @@
{
std::ostringstream ss_;
- ss_ << "Missing '}' at index " << state_._index - 1 << '.';
+ ss_ << "Missing '}' at index " <<
+ state_.index () - 1 << '.';
throw runtime_error (ss_.str ().c_str ());
}
@@ -367,7 +467,7 @@
{
std::ostringstream ss_;
- ss_ << "Missing '}' at index " << state_._index - 1 << '.';
+ ss_ << "Missing '}' at index " << state_.index () - 1 << '.';
throw runtime_error (ss_.str ().c_str ());
}
@@ -381,7 +481,7 @@
std::ostringstream ss_;
ss_ << "Cannot have exactly zero repeats preceding index " <<
- state_._index << '.';
+ state_.index () << '.';
throw runtime_error (ss_.str ().c_str ());
}
@@ -389,8 +489,8 @@
{
std::ostringstream ss_;
- ss_ << "Max less than min preceding index " << state_._index
- << '.';
+ ss_ << "Max less than min preceding index " <<
+ state_.index () << '.';
throw runtime_error (ss_.str ().c_str ());
}
@@ -412,7 +512,7 @@
std::ostringstream ss_;
ss_ << "Invalid MACRO name at index " <<
- state_._index - 1 << '.';
+ state_.index () - 1 << '.';
throw runtime_error (ss_.str ().c_str ());
}
@@ -432,7 +532,7 @@
{
std::ostringstream ss_;
- ss_ << "Missing '}' at index " << state_._index - 1 << '.';
+ ss_ << "Missing '}' at index " << state_.index () - 1 << '.';
throw runtime_error (ss_.str ().c_str ());
}
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk