|
Boost-Commit : |
From: eric_at_[hidden]
Date: 2008-01-26 14:38:46
Author: eric_niebler
Date: 2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
New Revision: 42986
URL: http://svn.boost.org/trac/boost/changeset/42986
Log:
optimize repeated searches with patterns that have leading repeats
Text files modified:
trunk/boost/xpressive/detail/core/finder.hpp | 22 +++++++++++++
trunk/boost/xpressive/detail/core/linker.hpp | 26 ++++++++++++++++
trunk/boost/xpressive/detail/core/matcher/simple_repeat_matcher.hpp | 26 ++++++++++++++++
trunk/boost/xpressive/detail/core/optimize.hpp | 9 ++++
trunk/boost/xpressive/detail/core/peeker.hpp | 65 +++++++++++++++++++++------------------
trunk/boost/xpressive/detail/core/state.hpp | 2 +
trunk/boost/xpressive/detail/static/transforms/as_quantifier.hpp | 6 +-
trunk/boost/xpressive/regex_algorithms.hpp | 4 +-
trunk/boost/xpressive/regex_iterator.hpp | 9 +++-
trunk/boost/xpressive/regex_token_iterator.hpp | 12 ++++---
10 files changed, 137 insertions(+), 44 deletions(-)
Modified: trunk/boost/xpressive/detail/core/finder.hpp
==============================================================================
--- trunk/boost/xpressive/detail/core/finder.hpp (original)
+++ trunk/boost/xpressive/detail/core/finder.hpp 2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -185,6 +185,28 @@
bool bits_[256];
};
+///////////////////////////////////////////////////////////////////////////////
+// leading_simple_repeat_finder
+//
+template<typename BidiIter>
+struct leading_simple_repeat_finder
+ : finder<BidiIter>
+{
+ leading_simple_repeat_finder()
+ : finder<BidiIter>()
+ {}
+
+ bool operator ()(match_state<BidiIter> &state) const
+ {
+ state.cur_ = state.next_search_;
+ return true;
+ }
+
+private:
+ leading_simple_repeat_finder(leading_simple_repeat_finder const &);
+ leading_simple_repeat_finder &operator =(leading_simple_repeat_finder const &);
+};
+
}}}
#if defined(_MSC_VER) && (_MSC_VER >= 1020)
Modified: trunk/boost/xpressive/detail/core/linker.hpp
==============================================================================
--- trunk/boost/xpressive/detail/core/linker.hpp (original)
+++ trunk/boost/xpressive/detail/core/linker.hpp 2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -145,6 +145,7 @@
: back_stack_()
, traits_(&traits)
, traits_type_(&typeid(Traits))
+ , has_backrefs_(false)
{
}
@@ -154,6 +155,24 @@
// no-op
}
+ template<typename Traits, typename ICase>
+ void accept(mark_matcher<Traits, ICase> const &, void const *)
+ {
+ this->has_backrefs_ = true;
+ }
+
+ template<typename Action>
+ void accept(action_matcher<Action> const &, void const *)
+ {
+ this->has_backrefs_ = true;
+ }
+
+ template<typename Predicate>
+ void accept(predicate_matcher<Predicate> const &, void const *)
+ {
+ this->has_backrefs_ = true;
+ }
+
void accept(repeat_begin_matcher const &, void const *next)
{
this->back_stack_.push(next);
@@ -217,6 +236,12 @@
matcher.xpr_.link(*this);
}
+ // accessors
+ bool has_backrefs() const
+ {
+ return this->has_backrefs_;
+ }
+
// for use by alt_link_pred below
template<typename Xpr>
void alt_branch_link(Xpr const &xpr, void const *next, xpression_peeker<Char> *peeker)
@@ -292,6 +317,7 @@
std::stack<void const *> back_stack_;
void const *traits_;
std::type_info const *traits_type_;
+ bool has_backrefs_;
};
}}} // namespace boost::xpressive::detail
Modified: trunk/boost/xpressive/detail/core/matcher/simple_repeat_matcher.hpp
==============================================================================
--- trunk/boost/xpressive/detail/core/matcher/simple_repeat_matcher.hpp (original)
+++ trunk/boost/xpressive/detail/core/matcher/simple_repeat_matcher.hpp 2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -16,6 +16,7 @@
#include <boost/assert.hpp>
#include <boost/mpl/if.hpp>
#include <boost/mpl/bool.hpp>
+#include <boost/next_prior.hpp>
#include <boost/xpressive/detail/detail_fwd.hpp>
#include <boost/xpressive/detail/core/quant_style.hpp>
#include <boost/xpressive/detail/core/state.hpp>
@@ -65,12 +66,14 @@
Xpr xpr_;
unsigned int min_, max_;
std::size_t width_;
+ mutable bool leading_;
simple_repeat_matcher(Xpr const &xpr, unsigned int min, unsigned int max, std::size_t width)
: xpr_(xpr)
, min_(min)
, max_(max)
, width_(width)
+ , leading_(false)
{
// it is the job of the parser to make sure this never happens
BOOST_ASSERT(min <= max);
@@ -101,6 +104,16 @@
++matches;
}
+ // If this repeater is at the front of the pattern, note
+ // how much of the input we consumed so that a repeated search
+ // doesn't have to cover the same ground again.
+ if(this->leading_)
+ {
+ state.next_search_ = (matches && matches < this->max_)
+ ? state.cur_
+ : (tmp == state.end_) ? tmp : boost::next(tmp);
+ }
+
if(this->min_ > matches)
{
state.cur_ = tmp;
@@ -126,6 +139,7 @@
template<typename BidiIter, typename Next>
bool match_(match_state<BidiIter> &state, Next const &next, non_greedy_tag) const
{
+ BOOST_ASSERT(!this->leading_);
BidiIter const tmp = state.cur_;
unsigned int matches = 0;
@@ -161,12 +175,24 @@
// is there enough room?
if(this->min_ > diff_to_end)
{
+ if(this->leading_)
+ {
+ // BUGBUG
+ state.next_search_ = boost::next(tmp);
+ }
return false;
}
BidiIter const min_iter = tmp + this->min_;
state.cur_ += (std::min)((std::size_t)this->max_, diff_to_end);
+ if(this->leading_)
+ {
+ state.next_search_ = (diff_to_end && diff_to_end < this->max_)
+ ? state.cur_
+ : (tmp == state.end_) ? tmp : boost::next(tmp);
+ }
+
for(;; --state.cur_)
{
if(next.match(state))
Modified: trunk/boost/xpressive/detail/core/optimize.hpp
==============================================================================
--- trunk/boost/xpressive/detail/core/optimize.hpp (original)
+++ trunk/boost/xpressive/detail/core/optimize.hpp 2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -39,6 +39,13 @@
new line_start_finder<BidiIter, Traits>(traits)
);
}
+ else if(peeker.leading_simple_repeat())
+ {
+ return intrusive_ptr<finder<BidiIter> >
+ (
+ new leading_simple_repeat_finder<BidiIter>()
+ );
+ }
else if(256 != peeker.bitset().count())
{
return intrusive_ptr<finder<BidiIter> >
@@ -96,7 +103,7 @@
// "peek" into the compiled regex to see if there are optimization opportunities
hash_peek_bitset<char_type> bset;
- xpression_peeker<char_type> peeker(bset, traits);
+ xpression_peeker<char_type> peeker(bset, traits, linker.has_backrefs());
regex->peek(peeker);
// optimization: get the peek chars OR the boyer-moore search string
Modified: trunk/boost/xpressive/detail/core/peeker.hpp
==============================================================================
--- trunk/boost/xpressive/detail/core/peeker.hpp (original)
+++ trunk/boost/xpressive/detail/core/peeker.hpp 2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -31,28 +31,7 @@
{
///////////////////////////////////////////////////////////////////////////////
-// peek_next
-// tell whether or not to keep looking for a peek optimization
-template<typename Matcher>
-struct peek_next
- : mpl::bool_<Matcher::width == 0>
-{
-};
-
-template<>
-struct peek_next<mark_begin_matcher>
- : mpl::true_
-{
-};
-
-template<>
-struct peek_next<repeat_begin_matcher>
- : mpl::true_
-{
-};
-
-///////////////////////////////////////////////////////////////////////////////
-// xpression_peeker
+// peeker_string
//
template<typename Char>
struct peeker_string
@@ -91,12 +70,14 @@
struct xpression_peeker
{
template<typename Traits>
- xpression_peeker(hash_peek_bitset<Char> &bset, Traits const &traits)
+ xpression_peeker(hash_peek_bitset<Char> &bset, Traits const &traits, bool has_backrefs = false)
: bset_(bset)
, str_()
, line_start_(false)
, traits_(0)
, traits_type_(0)
+ , leading_simple_repeat_(0)
+ , has_backrefs_(has_backrefs)
{
this->set_traits(traits);
}
@@ -113,6 +94,11 @@
return this->line_start_;
}
+ bool leading_simple_repeat() const
+ {
+ return 0 < this->leading_simple_repeat_;
+ }
+
hash_peek_bitset<Char> const &bitset() const
{
return this->bset_;
@@ -120,19 +106,31 @@
///////////////////////////////////////////////////////////////////////////////
// modifiers
- void fail(bool do_fail = true)
+ void fail()
{
- if(do_fail)
+ this->bset_.set_all();
+ }
+
+ template<typename Matcher>
+ mpl::false_ accept(Matcher const &)
+ {
+ this->fail();
+ return mpl::false_();
+ }
+
+ mpl::true_ accept(mark_begin_matcher const &)
+ {
+ if(this->has_backrefs_)
{
- this->bset_.set_all();
+ --this->leading_simple_repeat_;
}
+ return mpl::true_();
}
- template<typename Matcher>
- peek_next<Matcher> accept(Matcher const &)
+ mpl::true_ accept(repeat_begin_matcher const &)
{
- this->fail(!peek_next<Matcher>::value);
- return peek_next<Matcher>();
+ --this->leading_simple_repeat_;
+ return mpl::true_();
}
template<typename Traits>
@@ -228,6 +226,11 @@
template<typename Xpr, typename Greedy>
mpl::false_ accept(simple_repeat_matcher<Xpr, Greedy> const &xpr)
{
+ if(Greedy() && 1U == xpr.width_)
+ {
+ ++this->leading_simple_repeat_;
+ xpr.leading_ = this->leading_simple_repeat();
+ }
0 != xpr.min_ ? xpr.xpr_.peek(*this) : this->fail(); // could be a union of xpr and next
return mpl::false_();
}
@@ -268,6 +271,8 @@
bool line_start_;
void const *traits_;
std::type_info const *traits_type_;
+ int leading_simple_repeat_;
+ bool has_backrefs_;
};
}}} // namespace boost::xpressive::detail
Modified: trunk/boost/xpressive/detail/core/state.hpp
==============================================================================
--- trunk/boost/xpressive/detail/core/state.hpp (original)
+++ trunk/boost/xpressive/detail/core/state.hpp 2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -127,6 +127,7 @@
actionable const **action_list_tail_;
action_args_type *action_args_;
attr_context attr_context_;
+ BidiIter next_search_;
///////////////////////////////////////////////////////////////////////////////
//
@@ -151,6 +152,7 @@
, action_list_tail_(&action_list_.next)
, action_args_(&core_access<BidiIter>::get_action_args(what))
, attr_context_() // zero-initializes the fields of attr_context_
+ , next_search_(begin)
{
// reclaim any cached memory in the match_results struct
this->extras_->sub_match_stack_.unwind();
Modified: trunk/boost/xpressive/detail/static/transforms/as_quantifier.hpp
==============================================================================
--- trunk/boost/xpressive/detail/static/transforms/as_quantifier.hpp (original)
+++ trunk/boost/xpressive/detail/static/transforms/as_quantifier.hpp 2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -88,7 +88,7 @@
typedef typename Expr::proto_tag tag;
xpr_type const &xpr = Grammar()(proto::arg(expr), detail::true_xpression(), visitor);
- matcher_type matcher(xpr, min_type<tag>(), max_type<tag>(), xpr.get_width().value());
+ matcher_type matcher(xpr, (uint_t)min_type<tag>(), (uint_t)max_type<tag>(), xpr.get_width().value());
return proto::terminal<matcher_type>::type::make(matcher);
}
};
@@ -174,8 +174,8 @@
int mark_number = proto::arg(proto::left(marked_sub)).mark_number_;
BOOST_ASSERT(0 != mark_number);
- unsigned min_ = min_type<typename Expr::proto_tag>();
- unsigned max_ = max_type<typename Expr::proto_tag>();
+ uint_t min_ = (uint_t)min_type<typename Expr::proto_tag>();
+ uint_t max_ = (uint_t)max_type<typename Expr::proto_tag>();
detail::repeat_begin_matcher begin(mark_number);
detail::repeat_end_matcher<Greedy> end(mark_number, min_, max_);
Modified: trunk/boost/xpressive/regex_algorithms.hpp
==============================================================================
--- trunk/boost/xpressive/regex_algorithms.hpp (original)
+++ trunk/boost/xpressive/regex_algorithms.hpp 2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -537,7 +537,7 @@
}
out = what.format(out, fmt, flags);
- cur = state.cur_ = what[0].second;
+ cur = state.cur_ = state.next_search_ = what[0].second;
if(0 == (flags & format_first_only))
{
@@ -552,7 +552,7 @@
access::set_prefix_suffix(what, begin, end);
out = what.format(out, fmt, flags);
- cur = state.cur_ = what[0].second;
+ cur = state.cur_ = state.next_search_ = what[0].second;
not_null = (0 == what.length());
state.reset(what, *access::get_regex_impl(re));
}
Modified: trunk/boost/xpressive/regex_iterator.hpp
==============================================================================
--- trunk/boost/xpressive/regex_iterator.hpp (original)
+++ trunk/boost/xpressive/regex_iterator.hpp 2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -39,6 +39,7 @@
BidiIter begin
, BidiIter cur
, BidiIter end
+ , BidiIter next_search
, basic_regex<BidiIter> const *rex
, regex_constants::match_flag_type flags
, bool not_null = false
@@ -50,6 +51,7 @@
, not_null_(not_null)
{
this->state_.cur_ = cur;
+ this->state_.next_search_ = next_search;
}
bool next()
@@ -63,7 +65,7 @@
// Report position() correctly by setting the base different from prefix().first
access::set_base(this->what_, this->state_.begin_);
- this->state_.cur_ = this->what_[0].second;
+ this->state_.cur_ = this->state_.next_search_ = this->what_[0].second;
this->not_null_ = (0 == this->what_.length());
return true;
@@ -116,7 +118,7 @@
, basic_regex<BidiIter> const &rex
, regex_constants::match_flag_type flags = regex_constants::match_default
)
- : impl_(new impl_type_(begin, begin, end, &rex, flags))
+ : impl_(new impl_type_(begin, begin, end, begin, &rex, flags))
{
this->next_();
}
@@ -130,7 +132,7 @@
, detail::let_<LetExpr> const &args
, regex_constants::match_flag_type flags = regex_constants::match_default
)
- : impl_(new impl_type_(begin, begin, end, &rex, flags))
+ : impl_(new impl_type_(begin, begin, end, begin, &rex, flags))
{
detail::bind_args(args, this->impl_->what_);
this->next_();
@@ -222,6 +224,7 @@
that->state_.begin_
, that->state_.cur_
, that->state_.end_
+ , that->state_.next_search_
, that->rex_
, that->flags_
, that->not_null_
Modified: trunk/boost/xpressive/regex_token_iterator.hpp
==============================================================================
--- trunk/boost/xpressive/regex_token_iterator.hpp (original)
+++ trunk/boost/xpressive/regex_token_iterator.hpp 2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -39,13 +39,14 @@
BidiIter begin
, BidiIter cur
, BidiIter end
+ , BidiIter next_search
, basic_regex<BidiIter> const *rex
, regex_constants::match_flag_type flags = regex_constants::match_default
, std::vector<int> subs = std::vector<int>(1, 0)
, int n = -2
, bool not_null = false
)
- : iter_(begin, cur, end, rex, flags, not_null)
+ : iter_(begin, cur, end, next_search, rex, flags, not_null)
, result_()
, n_((-2 == n) ? (int)subs.size() - 1 : n)
, subs_()
@@ -158,7 +159,7 @@
, BidiIter end
, basic_regex<BidiIter> const &rex
)
- : impl_(new impl_type_(begin, begin, end, &rex))
+ : impl_(new impl_type_(begin, begin, end, begin, &rex))
{
this->next_();
}
@@ -176,7 +177,7 @@
, basic_regex<BidiIter> const &rex
, detail::let_<LetExpr> const &args
)
- : impl_(new impl_type_(begin, begin, end, &rex))
+ : impl_(new impl_type_(begin, begin, end, begin, &rex))
{
detail::bind_args(args, this->impl_->iter_.what_);
this->next_();
@@ -198,7 +199,7 @@
, Subs const &subs
, regex_constants::match_flag_type flags = regex_constants::match_default
)
- : impl_(new impl_type_(begin, begin, end, &rex, flags, detail::to_vector(subs)))
+ : impl_(new impl_type_(begin, begin, end, begin, &rex, flags, detail::to_vector(subs)))
{
this->next_();
}
@@ -221,7 +222,7 @@
, detail::let_<LetExpr> const &args
, regex_constants::match_flag_type flags = regex_constants::match_default
)
- : impl_(new impl_type_(begin, begin, end, &rex, flags, detail::to_vector(subs)))
+ : impl_(new impl_type_(begin, begin, end, begin, &rex, flags, detail::to_vector(subs)))
{
detail::bind_args(args, this->impl_->iter_.what_);
this->next_();
@@ -307,6 +308,7 @@
this->impl_->iter_.state_.begin_
, this->impl_->iter_.state_.cur_
, this->impl_->iter_.state_.end_
+ , this->impl_->iter_.state_.next_search_
, this->impl_->iter_.rex_
, this->impl_->iter_.flags_
, this->impl_->subs_
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk