Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r64672 - in sandbox/SOC/2010/stringalgos: boost/algorithm/string boost/algorithm/string/string_search libs/algorithm/string/benchmark libs/algorithm/string/doc libs/algorithm/string/example libs/algorithm/string/test
From: mstefanro_at_[hidden]
Date: 2010-08-07 18:38:28


Author: mstefanro
Date: 2010-08-07 18:38:24 EDT (Sat, 07 Aug 2010)
New Revision: 64672
URL: http://svn.boost.org/trac/boost/changeset/64672

Log:
[GSoC2010][StringAlgo] Improvements on the algorithms, changes on finders
Added:
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder_aliases.hpp (contents, props changed)
Text files modified:
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/benchmark_finder.hpp | 36
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/find.hpp | 88 +
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder.hpp | 1676 ++++++++++++++++++++++-----------------
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/split.hpp | 4
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/boyer_moore.hpp | 222 ++++
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/knuth_morris_pratt.hpp | 79 +
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/naive_search.hpp | 6
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/rabin_karp.hpp | 14
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/suffix_array.hpp | 23
   sandbox/SOC/2010/stringalgos/libs/algorithm/string/benchmark/string_search.cpp | 103 ++
   sandbox/SOC/2010/stringalgos/libs/algorithm/string/doc/Jamfile.v2 | 9
   sandbox/SOC/2010/stringalgos/libs/algorithm/string/doc/usage.xml | 191 ++--
   sandbox/SOC/2010/stringalgos/libs/algorithm/string/example/finder_example.cpp | 1
   sandbox/SOC/2010/stringalgos/libs/algorithm/string/test/finder_test.cpp | 237 +++++
   14 files changed, 1731 insertions(+), 958 deletions(-)

Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/benchmark_finder.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/benchmark_finder.hpp (original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/benchmark_finder.hpp 2010-08-07 18:38:24 EDT (Sat, 07 Aug 2010)
@@ -44,6 +44,10 @@
 #include <boost/spirit/home/phoenix/core/argument.hpp>
 #include <boost/spirit/home/phoenix/core/reference.hpp>
 
+//!\todo pretty messy, Boost.Chrono?
+#ifdef BOOST_WINDOWS
+#include <windows.h>
+#endif
 
 #include <deque>
 #include <string>
@@ -60,9 +64,15 @@
 
 namespace boost { namespace algorithm {
 
- template <class Range1T, class Range2T, class AlgorithmSequenceT,
- class ComparatorT/*,
- template <class,class,class,class,class,class> class AdditionalBehaviorT*/>
+ //! A generic finder type which benchmarks string search algorithms.
+ /** Poseses a similar interface to \ref finder_t and allows to test
+ the performance of various string search algorithms in order to allow easier
+ choice of the right algorithm for a certain data set
+ \tparam Range1T A range representing the type of the substring (the pattern)
+ \tparam Range2T A range representing the type of the string (the text)
+ */
+ template <class Range1T, class Range2T, class AlgorithmSequenceT,
+ class ComparatorT>
     class benchmark_finder :
         public boost::algorithm::detail::finder_typedefs<Range1T, Range2T,
         ComparatorT, std::allocator<std::size_t> >
@@ -188,11 +198,25 @@
             string_range_type operator() (string_range_type correct, Finder &finder) const
             {
                 string_range_type ret;
- double time;
+ //pray for Boost.Chrono, I don't like this.
+ double elapsed;
                 try {
+# ifdef BOOST_WINDOWS
+ LARGE_INTEGER start, end, freq;
+ QueryPerformanceFrequency(&freq);
+ QueryPerformanceCounter(&start);
+# else
                     boost::timer t;
+# endif
+
                     ret = finder.first.find_next();
- time = t.elapsed();
+
+# ifdef BOOST_WINDOWS
+ QueryPerformanceCounter(&end);
+ elapsed = (double)(end.QuadPart - start.QuadPart)/freq.QuadPart;
+# else
+ elapsed = t.elapsed();
+# endif
                 } catch (std::exception const &e) { BOOST_THROW_EXCEPTION(e); }
                 bool is_correct = boost::equal(correct, ret);
                 //assert(is_correct);
@@ -211,7 +235,7 @@
 
                     BOOST_THROW_EXCEPTION( std::runtime_error(ss.str()) );
                 }
- finder.second.push_back(time);
+ finder.second.push_back(elapsed);
                 return correct;
             }
         };

Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/find.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/find.hpp (original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/find.hpp 2010-08-07 18:38:24 EDT (Sat, 07 Aug 2010)
@@ -29,6 +29,8 @@
     delimiting the substring.
 */
 
+//!\todo update doc here
+
 namespace boost {
     namespace algorithm {
 
@@ -45,6 +47,7 @@
                 Returned iterator is either \c RangeT::iterator or
                 \c RangeT::const_iterator, depending on the constness of
                 the input parameter.
+ \deprecated
         */
         template<typename RangeT, typename FinderT>
         inline iterator_range<
@@ -53,27 +56,41 @@
             RangeT& Input,
             const FinderT& Finder)
         {
+
             iterator_range<BOOST_STRING_TYPENAME range_iterator<RangeT>::type> lit_input(::boost::as_literal(Input));
-
             return Finder(::boost::begin(lit_input),::boost::end(lit_input));
         }
 
 // find_first -----------------------------------------------//
 
- //! Find first algorithm
+ //! Find first algorithm (not deprecated)
         /*!
             Search for the first occurrence of the substring in the input.
             
             \param Input A string which will be searched.
             \param Search A substring to be searched for.
             \return
- An \c iterator_range delimiting the match.
- Returned iterator is either \c RangeT::iterator or
- \c RangeT::const_iterator, depending on the constness of
- the input parameter.
+ An \c iterator_range delimiting the match.
+ Returned iterator is either \c RangeT::iterator or
+ \c RangeT::const_iterator, depending on the constness of
+ the input parameter.
+
+ \note This function provides the strong exception-safety guarantee
+ */
+ template<typename Range1T, typename Range2T, typename AlgorithmTagT>
+ inline iterator_range<
+ BOOST_STRING_TYPENAME range_iterator<Range1T>::type>
+ find_first(
+ Range1T& Input,
+ const Range2T& Search,
+ AlgorithmTagT const &algorithm)
+ {
+ boost::algorithm::simplified_finder_t<Range2T, Range1T, typename AlgorithmTagT::type>
+ finder(&Search, &Input);
+ return finder.find_first();
+ //return ::boost::algorithm::find(Input, ::boost::algorithm::first_finder(Search));
+ }
 
- \note This function provides the strong exception-safety guarantee
- */
         template<typename Range1T, typename Range2T>
         inline iterator_range<
             BOOST_STRING_TYPENAME range_iterator<Range1T>::type>
@@ -81,9 +98,10 @@
             Range1T& Input,
             const Range2T& Search)
         {
- return ::boost::algorithm::find(Input, ::boost::algorithm::first_finder(Search));
+ return boost::algorithm::find_first(Input, Search, boost::algorithm::default_finder_algorithm_tag());
         }
 
+
         //! Find first algorithm ( case insensitive )
         /*!
             Search for the first occurence of the substring in the input.
@@ -100,16 +118,30 @@
 
             \note This function provides the strong exception-safety guarantee
         */
- template<typename Range1T, typename Range2T>
+ template<typename Range1T, typename Range2T, typename AlgorithmTagT>
         inline iterator_range<
             BOOST_STRING_TYPENAME range_iterator<Range1T>::type>
         ifind_first(
             Range1T& Input,
             const Range2T& Search,
+ AlgorithmTagT const &,
             const std::locale& Loc=std::locale())
         {
- return ::boost::algorithm::find(Input, ::boost::algorithm::first_finder(Search,is_iequal(Loc)));
- //return ::boost::algorithm::find(Input, ::boost::algorithm::finder_t<
+ boost::algorithm::simplified_finder_t<Range2T, Range1T, typename AlgorithmTagT::type,
+ ::boost::algorithm::is_iequal> finder(&Search, &Input, ::boost::algorithm::is_iequal(Loc));
+ return finder.find_first();
+ }
+
+ template<typename Range1T, typename Range2T>
+ inline iterator_range<
+ BOOST_STRING_TYPENAME range_iterator<Range1T>::type>
+ ifind_first(
+ Range1T& Input,
+ const Range2T& Search,
+ const std::locale& Loc=std::locale())
+ {
+ return boost::algorithm::ifind_first(Input, Search,
+ boost::algorithm::default_finder_algorithm_tag(), Loc);
         }
 
 // find_last -----------------------------------------------//
@@ -128,14 +160,42 @@
 
             \note This function provides the strong exception-safety guarantee
         */
- template<typename Range1T, typename Range2T>
+ template<typename Range1T, typename Range2T, typename AlgorithmTagT>
         inline iterator_range<
             BOOST_STRING_TYPENAME range_iterator<Range1T>::type>
         find_last(
             Range1T& Input,
+ const Range2T& Search,
+ AlgorithmTagT const &)
+ {
+ //!\todo find a way to use rbegin and rend here (via boost::adaptors::reversed)
+ // if they are available (i.e. if the ranges are bidirectional at least)
+ //!\TODO IMPORTANT FIX this so it works (the final if check does not work properly)
+ simplified_finder_t<Range2T, Range1T, typename AlgorithmTagT::type>
+ finder(&Search, &Input);
+ boost::iterator_range<typename boost::range_iterator<Range1T>::type>
+ crt = make_iterator_range(boost::end(Input), boost::end(Input)),
+ prev;
+ for (;;)
+ {
+ prev=crt;
+ crt=finder.find_next();
+ if (boost::begin(crt) == boost::end(Input)) break;
+ }
+ return prev;
+
+ //return ::boost::algorithm::find(Input, ::boost::algorithm::last_finder(Search));
+ }
+
+ template<typename Range1T, typename Range2T>
+ inline iterator_range<
+ BOOST_STRING_TYPENAME range_iterator<Range1T>::type>
+ find_last(
+ Range1T& Input,
             const Range2T& Search)
         {
- return ::boost::algorithm::find(Input, ::boost::algorithm::last_finder(Search));
+ return boost::algorithm::find_last(Input, Search,
+ boost::algorithm::default_finder_algorithm_tag());
         }
 
         //! Find last algorithm ( case insensitive )

Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder.hpp (original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder.hpp 2010-08-07 18:38:24 EDT (Sat, 07 Aug 2010)
@@ -22,6 +22,7 @@
 #include <boost/algorithm/string/constants.hpp>
 #include <boost/algorithm/string/detail/finder.hpp>
 #include <boost/algorithm/string/compare.hpp>
+#include <boost/algorithm/string/string_search/knuth_morris_pratt.hpp>
 
 #include <boost/call_traits.hpp>
 
@@ -54,827 +55,999 @@
 
 // Finder types ---------------------------------------------//
 
- struct finder_no_additional_behavior;
- struct finder_functor_first_finder;
- struct finder_functor_last_finder;
- struct finder_functor_nth_finder;
-
- //! \todo use an allocator metafunction instead
- //! \todo derive from additionalbehaviort? use it at all?
- template <class Range1T, class Range2T, class AlgorithmT,
- class ComparatorT = ::boost::algorithm::is_equal,
- class AllocatorT = std::allocator<std::size_t>/*,
- class AdditionalBehaviorT = boost::algorithm::finder_no_additional_behavior*/
- >
- class simplified_finder_t :
- //public boost::algorithm::detail::finder_typedefs<Range1T,Range2T,ComparatorT,AllocatorT>,
- public AlgorithmT::template algorithm<
- simplified_finder_t<Range1T, Range2T, AlgorithmT, ComparatorT, AllocatorT/*, AdditionalBehaviorT*/>,
- Range1T, Range2T, ComparatorT,AllocatorT>
+ //struct finder_no_additional_behavior;
+ struct finder_behavior_first_finder;
+ struct finder_behavior_last_finder;
+ struct finder_behavior_nth_finder;
+
+ struct default_finder_algorithm_tag { typedef boost::algorithm::knuth_morris_pratt type; };
+ typedef boost::algorithm::knuth_morris_pratt default_finder_algorithm;
+
+ //! \todo use an allocator metafunction instead
+ //! \todo derive from additionalbehaviort? use it at all?
+ //! \TODO !!!!!!! IMPORTANT!!!! REMOVE THE ComparatorT template parameter and use boost::function
+ //! as a ctor parameter.
+ //! Also, revert the order of AllocatorT with AdditionalBehaviorT so it's easier to access
+ //! deprecated finders
+ //! Create template proxies for first_finder_t, last_finder_t, nth_finder_t
+ template <class Range1T, class Range2T, class AlgorithmT,
+ class ComparatorT = ::boost::algorithm::is_equal,
+ class AllocatorT = std::allocator<std::size_t>,
+ class AdditionalBehaviorT = boost::algorithm::finder_behavior_first_finder
+ >
+ class simplified_finder_t :
+ //public boost::algorithm::detail::finder_typedefs<Range1T,Range2T,ComparatorT,AllocatorT>,
+ public AlgorithmT::template algorithm<
+ simplified_finder_t<Range1T, Range2T, AlgorithmT, ComparatorT, AllocatorT, AdditionalBehaviorT>,
+ Range1T, Range2T, ComparatorT,AllocatorT>,
+ public AdditionalBehaviorT::template apply<
+ typename simplified_finder_t<Range1T, Range2T, AlgorithmT, ComparatorT, AllocatorT, AdditionalBehaviorT>,
+ Range1T, Range2T, AlgorithmT, ComparatorT, AllocatorT>
+ {
+ //! \todo Add concept assertions here.
+ private:
+ typedef typename AlgorithmT::template algorithm<simplified_finder_t, Range1T,
+ Range2T, ComparatorT, AllocatorT> algorithm_type;
+ public:
+ //!\todo remove
+ void g() {
+ //std::ofstream o1("D://o1.txt");
+ //std::ofstream o2("D://o2.txt");
+ //boost::copy(string_range_, std::ostream_iterator<char>(o1));
+ //boost::copy(substring_range_, std::ostream_iterator<char>(o2));
+ }
+ typedef typename algorithm_type::substring_type substring_type;
+ typedef typename algorithm_type::string_type string_type;
+ typedef typename algorithm_type::comparator_type comparator_type;
+ typedef typename algorithm_type::allocator_type allocator_type;
+ typedef typename algorithm_type::substring_iterator_type substring_iterator_type;
+ typedef typename algorithm_type::string_iterator_type string_iterator_type;
+ typedef typename algorithm_type::substring_char_type substring_char_type;
+ typedef typename algorithm_type::string_char_type string_char_type;
+ typedef typename algorithm_type::substring_range_type substring_range_type;
+ typedef typename algorithm_type::string_range_type string_range_type;
+ typedef typename algorithm_type::string_difference_type string_difference_type;
+
+ explicit simplified_finder_t(ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT())
+ : substring_range_(), string_range_(), substring_has_changed_(false),
+ string_has_changed_(false), comparator_(comparator), allocator_(allocator),
+ start_offset_()
+ //!\todo remove
+ //{ if(rand()==48&&rand()==49) g(); }
+ { }
+ simplified_finder_t(Range1T const *const substr, Range2T *str,
+ ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT())
+ : substring_range_(boost::as_literal(*substr)),
+ string_range_(boost::as_literal(*str)),
+ comparator_(comparator), allocator_(allocator),
+ substring_has_changed_(true), string_has_changed_(true),
+ algorithm_type()
+ { }
+ simplified_finder_t (const simplified_finder_t &other)
+ : substring_range_(other.substring_range_), string_range_(other.string_range_),
+ substring_has_changed_(other.substring_has_changed_), string_has_changed_(other.string_has_changed_),
+ comparator_(other.comparator_), allocator_(other.allocator_), start_offset_(other.start_offset_),
+ algorithm_type(other)
         {
- //! \todo Add concept assertions here.
- private:
- typedef typename AlgorithmT::template algorithm<simplified_finder_t, Range1T,
- Range2T, ComparatorT, AllocatorT> algorithm_type;
- public:
- typedef typename algorithm_type::substring_type substring_type;
- typedef typename algorithm_type::string_type string_type;
- typedef typename algorithm_type::comparator_type comparator_type;
- typedef typename algorithm_type::allocator_type allocator_type;
- typedef typename algorithm_type::substring_iterator_type substring_iterator_type;
- typedef typename algorithm_type::string_iterator_type string_iterator_type;
- typedef typename algorithm_type::substring_char_type substring_char_type;
- typedef typename algorithm_type::string_char_type string_char_type;
- typedef typename algorithm_type::substring_range_type substring_range_type;
- typedef typename algorithm_type::string_range_type string_range_type;
- typedef typename algorithm_type::string_difference_type string_difference_type;
-
- simplified_finder_t(ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT())
- : substring_range_(), string_range_(), substring_has_changed_(false),
- string_has_changed_(false), comparator_(comparator), allocator_(allocator),
- start_offset_()
- { }
- simplified_finder_t(Range1T const *const substr, Range2T *str,
- ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT())
- : substring_range_(boost::as_literal(*substr)),
- string_range_(boost::as_literal(*str)),
- comparator_(comparator), allocator_(allocator),
- substring_has_changed_(true), string_has_changed_(true),
- algorithm_type()
- { }
+ }
+ simplified_finder_t &operator=(const simplified_finder_t &rhs)
+ {
+ substring_range_ = rhs.substring_range_;
+ string_range_ = rhs.string_range_;
+ substring_has_changed_ = rhs.substring_has_changed_;
+ string_has_changed_ = rhs.string_has_changed_;
+ comparator_ = rhs.comparator_;
+ allocator_ = rhs.allocator_;
+ start_offset_ = rhs.start_offset_;
+ return *this;
+ }
 
- void set_substring (Range1T const *substr)
- { substring_range_ = boost::as_literal(*substr); substring_has_changed_ = true; }
+ void set_substring (Range1T const *substr)
+ { substring_range_ = boost::as_literal(*substr); substring_has_changed_ = true; }
 
- void set_string (Range2T *str)
- { string_range_ = boost::as_literal(*str); string_has_changed_ = true; }
+ void set_string (Range2T *str)
+ { string_range_ = boost::as_literal(*str); string_has_changed_ = true; }
 
- void find_reset ()
- { start_offset_ = boost::begin(string_range_); }
+ void find_reset ()
+ { start_offset_ = boost::begin(string_range_); }
             
- string_range_type find_first ()
- {
- find_reset();
- return find_next();
- }
+ string_range_type find_first ()
+ {
+ find_reset();
+ return find_next();
+ }
 
- //! \todo Get rid of this refresh*() idea everywhere
+ //! \todo Get rid of this refresh*() idea everywhere
 
- //!\todo Assert in case this was called after an empty ctor
- string_range_type find_next()
+ //!\todo Assert in case this was called after an empty ctor
+ string_range_type find_next()
+ {
+ apply_changes();
+ if (start_offset_ == boost::end(string_range_))
+ return string_range_type(start_offset_, start_offset_);
+ string_range_type ret =
+ algorithm_type::find(start_offset_);
+ if (boost::begin(ret) == boost::end(string_range_))
             {
- apply_changes();
- if (start_offset_ == boost::end(string_range_))
- return string_range_type(start_offset_, start_offset_);
- string_range_type ret =
- algorithm_type::find(start_offset_);
- if (boost::begin(ret) == boost::end(string_range_))
- {
- start_offset_ = boost::end(string_range_);
- return ret;
- }
- else
- {
- start_offset_ = boost::begin(ret);
- ++start_offset_;
- return ret;
- }
+ start_offset_ = boost::end(string_range_);
+ return ret;
+ }
+ else
+ {
+ start_offset_ = boost::begin(ret);
+ ++start_offset_;
+ return ret;
             }
+ }
 
- substring_range_type get_substring_range() const { return substring_range_; }
- string_range_type get_string_range() const { return string_range_; }
+ substring_range_type get_substring_range() const { return substring_range_; }
+ string_range_type get_string_range() const { return string_range_; }
             
- typename boost::call_traits<comparator_type>::reference get_comparator()
- { return comparator_; }
-
- typename boost::call_traits<comparator_type>::const_reference get_comparator() const
- { return comparator_; }
+ typename boost::call_traits<comparator_type>::const_reference get_comparator() const
+ { return comparator_; }
        
- //! Gets a reference to the current allocator
- typename boost::call_traits<allocator_type>::reference get_allocator()
- { return allocator_; }
+ //! Gets a reference to the current allocator
+ typename boost::call_traits<allocator_type>::reference get_allocator()
+ { return allocator_; }
             
- /*!
- \overload
- */
- typename boost::call_traits<allocator_type>::const_reference get_allocator() const
- { return allocator_; }
+ /*!
+ \overload
+ */
+ typename boost::call_traits<allocator_type>::const_reference get_allocator() const
+ { return allocator_; }
 
- private:
- inline void apply_changes()
- {
- if (substring_has_changed_ || string_has_changed_) {
- find_reset();
- if (substring_has_changed_) {
- on_substring_change();
- substring_has_changed_ = false;
- }
- if (string_has_changed_) {
- on_string_change();
- string_has_changed_ = false;
- }
+ private:
+ inline void apply_changes()
+ {
+ if (substring_has_changed_ || string_has_changed_) {
+ find_reset();
+ if (substring_has_changed_) {
+ on_substring_change();
+ substring_has_changed_ = false;
+ }
+ if (string_has_changed_) {
+ on_string_change();
+ string_has_changed_ = false;
                 }
             }
+ }
 
- protected:
- substring_range_type substring_range_;
- string_range_type string_range_;
- bool substring_has_changed_, string_has_changed_;
-
- comparator_type comparator_;
- allocator_type allocator_;
-
- string_iterator_type start_offset_;
- };
-
- //! \todo copyable finder types
 
- //! A generic finder type
+ protected:
+ substring_range_type substring_range_;
+ string_range_type string_range_;
+ bool substring_has_changed_, string_has_changed_;
+
+ comparator_type comparator_;
+ allocator_type allocator_;
+
+ string_iterator_type start_offset_;
+ };
+
+
+ //!\todo fix simplified_finder_t then uncomment this code. use these structs in finder generators return types
+ /*
+ template <class Range1T, class Range2T, class AlgorithmT>
+ struct first_finder_t
+ {
+ typedef boost::algorithm::simplified_finder_t<Range1T, Range2T, AlgorithmT,
+ boost::algorithm::finder_behavior_first_finder> type;
+ };
+
+ template <class Range1T, class Range2T, class AlgorithmT>
+ struct last_finder_t
+ {
+ typedef boost::algorithm::simplified_finder_t<Range1T, Range2T, AlgorithmT,
+ boost::algorithm::finder_behavior_last_finder> type;
+ };
+
+ template <class Range1T, class Range2T, class AlgorithmT>
+ struct nth_finder_t
+ {
+ typedef boost::algorithm::simplified_finder_t<Range1T, Range2T, AlgorithmT,
+ boost::algorithm::finder_behavior_nth_finder> type;
+ };
+ */
+
+ //! \todo copyable finder type below
+
+ //! A generic finder type
+ /*!
+ Allows simple use of various string searching algorithms.
+ \tparam Sequence1T The type of the substring
+ \tparam Sequence2T The type of the string
+ \tparam Algorithm An algorithm class
+ which will be used for performing the searches. \see string_search.hpp
+ \tparam Comparator Optional template parameter passed to the algorithm.
+ Used for comparing individual characters for equality.
+ \tparam Allocator Optional template parameter passed to the algorithm
+ in the event that additional computation on the data has to be stored.
+ */
+ template <
+ class Sequence1T, class Sequence2T,
+ class AlgorithmT,
+ typename ComparatorT = ::boost::algorithm::is_equal,
+ class AllocatorT = std::allocator<std::size_t>,
+ class AdditionalBehaviorT = boost::algorithm::finder_behavior_first_finder
+ >
+ class finder_t :
+ public AlgorithmT::template algorithm<
+ typename finder_t<Sequence1T, Sequence2T, AlgorithmT, ComparatorT, AllocatorT, AdditionalBehaviorT>,
+ Sequence1T, Sequence2T, ComparatorT, AllocatorT>,
+ public AdditionalBehaviorT::template apply<
+ typename finder_t<Sequence1T, Sequence2T, AlgorithmT, ComparatorT, AllocatorT, AdditionalBehaviorT>,
+ Sequence1T,Sequence2T,AlgorithmT,ComparatorT,AllocatorT>//,
+ //public boost::algorithm::detail::finder_typedefs<Sequence1T, Sequence2T, ComparatorT, AllocatorT>
+ {
+ //! \todo: Maybe write a String concept?
+ //! \todo: Currently, there's a SGI Sequence Concept implemented by Boost.ConceptCheck,
+ //! but std::string does not obey this concept, which means that even calling these template
+ //! parameters sequences is wrong.
+ BOOST_CONCEPT_ASSERT((boost::Container<Sequence1T>));
+ BOOST_CONCEPT_ASSERT((boost::Container<Sequence2T>));
+ public:
+ /*
+ //! The type of the substring
+ typedef Sequence1T substring_type;
+ //! The type of the string
+ typedef Sequence2T string_type;
+ //! The type of the comparator
+ typedef Comparator comparator_type;
+ //! The type of the allocator
+ typedef Allocator allocator_type;
+ //! The type of the substring's iterator
+ typedef typename boost::range_const_iterator<Sequence1T>::type substring_iterator_type;
+ //! The type of the string's iterator
+ typedef typename boost::range_iterator<Sequence2T>::type string_iterator_type;
+ //! The character type of the substring
+ typedef typename boost::iterator_value<substring_iterator_type>::type substring_char_type;
+ //! The character type of the string
+ typedef typename boost::iterator_value<string_iterator_type>::type string_char_type;
+ typedef typename boost::iterator_range<substring_iterator_type> substring_range_type;
+ typedef typename boost::iterator_range<string_iterator_type> string_range_type;
+ typedef typename boost::iterator_difference<string_iterator_type>::type string_difference_type;
+ */
+ //! The type of the algorithm
+ typedef typename AlgorithmT::template algorithm<finder_t,
+ Sequence1T, Sequence2T, ComparatorT, AllocatorT> algorithm_type;
+
+ typedef typename algorithm_type::substring_type substring_type;
+ typedef typename algorithm_type::string_type string_type;
+ typedef typename algorithm_type::comparator_type comparator_type;
+ typedef typename algorithm_type::allocator_type allocator_type;
+ typedef typename algorithm_type::substring_iterator_type substring_iterator_type;
+ typedef typename algorithm_type::string_iterator_type string_iterator_type;
+ typedef typename algorithm_type::substring_char_type substring_char_type;
+ typedef typename algorithm_type::string_char_type string_char_type;
+ typedef typename algorithm_type::substring_range_type substring_range_type;
+ typedef typename algorithm_type::string_range_type string_range_type;
+ typedef typename algorithm_type::string_difference_type string_difference_type;
+ //! Constructs a finder given a string and a substring
         /*!
- Allows simple use of various string searching algorithms.
- \tparam Sequence1T The type of the substring
- \tparam Sequence2T The type of the string
- \tparam Algorithm An algorithm class
- which will be used for performing the searches. \see string_search.hpp
- \tparam Comparator Optional template parameter passed to the algorithm.
- Used for comparing individual characters for equality.
- \tparam Allocator Optional template parameter passed to the algorithm
- in the event that additional computation on the data has to be stored.
- */
- template <
- class Sequence1T, class Sequence2T,
- class AlgorithmT,
- typename ComparatorT = ::boost::algorithm::is_equal,
- class AllocatorT = std::allocator<std::size_t>,
- class AdditionalBehaviorT = boost::algorithm::finder_no_additional_behavior
- >
- class finder_t :
- public AlgorithmT::template algorithm<
- typename finder_t<Sequence1T, Sequence2T, AlgorithmT, ComparatorT, AllocatorT, AdditionalBehaviorT>,
- Sequence1T, Sequence2T, ComparatorT, AllocatorT>,
- public AdditionalBehaviorT::template apply<
- typename finder_t<Sequence1T, Sequence2T, AlgorithmT, ComparatorT, AllocatorT, AdditionalBehaviorT>,
- Sequence1T,Sequence2T,AlgorithmT,ComparatorT,AllocatorT>//,
- //public boost::algorithm::detail::finder_typedefs<Sequence1T, Sequence2T, ComparatorT, AllocatorT>
- {
- //! \todo: Maybe write a String concept?
- //! \todo: Currently, there's a SGI Sequence Concept implemented by Boost.ConceptCheck,
- //! but std::string does not obey this concept, which means that even calling these template
- //! paramters sequences is wrong.
- BOOST_CONCEPT_ASSERT((boost::Container<Sequence1T>));
- BOOST_CONCEPT_ASSERT((boost::Container<Sequence2T>));
- public:
- /*
- //! The type of the substring
- typedef Sequence1T substring_type;
- //! The type of the string
- typedef Sequence2T string_type;
- //! The type of the comparator
- typedef Comparator comparator_type;
- //! The type of the allocator
- typedef Allocator allocator_type;
- //! The type of the substring's iterator
- typedef typename boost::range_const_iterator<Sequence1T>::type substring_iterator_type;
- //! The type of the string's iterator
- typedef typename boost::range_iterator<Sequence2T>::type string_iterator_type;
- //! The character type of the substring
- typedef typename boost::iterator_value<substring_iterator_type>::type substring_char_type;
- //! The character type of the string
- typedef typename boost::iterator_value<string_iterator_type>::type string_char_type;
- typedef typename boost::iterator_range<substring_iterator_type> substring_range_type;
- typedef typename boost::iterator_range<string_iterator_type> string_range_type;
- typedef typename boost::iterator_difference<string_iterator_type>::type string_difference_type;
+ \param substring Either a range (or character array)
+ of the substring to be searched, or a pointer to a sequence of type \c substring_type.
+ \param string Either a range (or character array)
+ of the string to be searched, or a pointer to a sequence of type \c string_type.
+ \param comparator A comparator object passed on to the algorithm
+ \param allocator An allocator object passed on to the algorithm
+ \note Both the substring and string can be passed either as references of ranges,
+ or as pointers to sequences. In the former case, the substring and/or string is copied
+ inside the class. In the latter class, the pointer is used and no copy is made.
+ \warning Whereas the overloads taking pointers are faster (because no strings are copied around),
+ you have to guarantee that the lifetime of your pointee is at least as long as the lifetime
+ of the finder. If you cannot guarantee on the lifetime, use a reference instead, which will
+ force a copy.
+ \note If a rvalue reference is passed as the string or substring, and your compiler supports rvalue
+ references, then a move is performed as opposed to a copy.
             */
- //! The type of the algorithm
- typedef typename AlgorithmT::template algorithm<finder_t,
- Sequence1T, Sequence2T, ComparatorT, AllocatorT> algorithm_type;
-
- typedef typename algorithm_type::substring_type substring_type;
- typedef typename algorithm_type::string_type string_type;
- typedef typename algorithm_type::comparator_type comparator_type;
- typedef typename algorithm_type::allocator_type allocator_type;
- typedef typename algorithm_type::substring_iterator_type substring_iterator_type;
- typedef typename algorithm_type::string_iterator_type string_iterator_type;
- typedef typename algorithm_type::substring_char_type substring_char_type;
- typedef typename algorithm_type::string_char_type string_char_type;
- typedef typename algorithm_type::substring_range_type substring_range_type;
- typedef typename algorithm_type::string_range_type string_range_type;
- typedef typename algorithm_type::string_difference_type string_difference_type;
- //! Constructs a finder given a string and a substring
- /*!
- \param substring Either a range (or character array)
- of the substring to be searched, or a pointer to a sequence of type \c substring_type.
- \param string Either a range (or character array)
- of the string to be searched, or a pointer to a sequence of type \c string_type.
- \param comparator A comparator object passed on to the algorithm
- \param allocator An allocator object passed on to the algorithm
- \note Both the substring and string can be passed either as references of ranges,
- or as pointers to sequences. In the former case, the substring and/or string is copied
- inside the class. In the latter class, the pointer is used and no copy is made.
- \warning Whereas the overloads taking pointers are faster (because no strings are copied around),
- you have to guarantee that the lifetime of your pointee is at least as long as the lifetime
- of the finder. If you cannot guarantee on the lifetime, use a reference instead, which will
- force a copy.
- \note If a rvalue reference is passed as the string or substring, and your compiler supports rvalue
- references, then a move is performed as opposed to a copy.
- */
- explicit finder_t (const Sequence1T *const substring = 0, Sequence2T *const string = 0,
- ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT())
- : comparator_(comparator), allocator_(allocator),
- substring_optional_copy_(), substring_range_(substring?*substring:substring_optional_copy_),
- string_optional_copy_(), string_range_(string?*string:string_optional_copy_),
- start_offset_(boost::begin(string_range_)),
- algorithm_type(),
- substring_has_changed_(true), string_has_changed_(true)
- { }
-
- //! \overload
- template <class Range2T>
- finder_t (const Sequence1T *const substring, const Range2T &string,
- ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT(),
- typename boost::disable_if<typename ::boost::algorithm::detail::is_pointer_to<Range2T,Sequence2T> >::type* = 0)
- : comparator_(comparator), allocator_(allocator),
- substring_optional_copy_(), substring_range_(substring?*substring:substring_optional_copy_),
- string_optional_copy_(), string_range_(),
- start_offset_(),
- algorithm_type(),
- substring_has_changed_(true), string_has_changed_(true)
- {
- set_string(string);
- //on_substring_change();
- }
+ explicit finder_t (const Sequence1T *const substring = 0, Sequence2T *const string = 0,
+ ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT())
+ : comparator_(comparator), allocator_(allocator),
+ substring_optional_copy_(), substring_range_(substring?*substring:substring_optional_copy_),
+ string_optional_copy_(), string_range_(string?*string:string_optional_copy_),
+ start_offset_(boost::begin(string_range_)),
+ algorithm_type(),
+ substring_has_changed_(true), string_has_changed_(true)
+ { }
+
+ //! \overload
+ template <class Range2T>
+ finder_t (const Sequence1T *const substring, const Range2T &string,
+ ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT(),
+ typename boost::disable_if<typename ::boost::algorithm::detail::is_pointer_to<Range2T,Sequence2T> >::type* = 0)
+ : comparator_(comparator), allocator_(allocator),
+ substring_optional_copy_(), substring_range_(substring?*substring:substring_optional_copy_),
+ string_optional_copy_(), string_range_(),
+ start_offset_(),
+ algorithm_type(),
+ substring_has_changed_(true), string_has_changed_(true)
+ {
+ set_string(string);
+ //on_substring_change();
+ }
 
- //! \overload
- template <class Range1T>
- explicit finder_t (const Range1T &substring, Sequence2T *const string = 0,
- ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT(),
- typename boost::disable_if<typename ::boost::algorithm::detail::is_pointer_to<Range1T,Sequence1T> >::type* = 0)
- : comparator_(comparator), allocator_(allocator),
- substring_optional_copy_(), substring_range_(),
- string_optional_copy_(), string_range_(string?boost::as_literal(*string):string_optional_copy_),
- start_offset_(boost::begin(string_range_)),
- algorithm_type(),
- substring_has_changed_(true), string_has_changed_(true)
- {
- set_substring(substring);
- //on_string_change();
- }
+ //! \overload
+ template <class Range1T>
+ explicit finder_t (const Range1T &substring, Sequence2T *const string = 0,
+ ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT(),
+ typename boost::disable_if<typename ::boost::algorithm::detail::is_pointer_to<Range1T,Sequence1T> >::type* = 0)
+ : comparator_(comparator), allocator_(allocator),
+ substring_optional_copy_(), substring_range_(),
+ string_optional_copy_(), string_range_(string?boost::as_literal(*string):string_optional_copy_),
+ start_offset_(boost::begin(string_range_)),
+ algorithm_type(),
+ substring_has_changed_(true), string_has_changed_(true)
+ {
+ set_substring(substring);
+ //on_string_change();
+ }
 
- //! \overload
- template <class Range1T, class Range2T>
- finder_t (const Range1T &substring, const Range2T &string,
- ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT(),
- typename boost::disable_if<boost::mpl::or_<typename ::boost::algorithm::detail::is_pointer_to<Range1T,Sequence1T>,
- typename ::boost::algorithm::detail::is_pointer_to<Range2T,Sequence2T> > >::type* = 0)
- : comparator_(comparator), allocator_(allocator),
- substring_optional_copy_(), substring_range_(),
- string_optional_copy_(), string_range_(),
- start_offset_(),
- algorithm_type(),
- substring_has_changed_(true), string_has_changed_(true)
- {
- set_substring(substring);
- set_string(string);
- }
+ //! \overload
+ template <class Range1T, class Range2T>
+ finder_t (const Range1T &substring, const Range2T &string,
+ ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT(),
+ typename boost::disable_if<boost::mpl::or_<typename ::boost::algorithm::detail::is_pointer_to<Range1T,Sequence1T>,
+ typename ::boost::algorithm::detail::is_pointer_to<Range2T,Sequence2T> > >::type* = 0)
+ : comparator_(comparator), allocator_(allocator),
+ substring_optional_copy_(), substring_range_(),
+ string_optional_copy_(), string_range_(),
+ start_offset_(),
+ algorithm_type(),
+ substring_has_changed_(true), string_has_changed_(true)
+ {
+ set_substring(substring);
+ set_string(string);
+ }
 
 # ifdef BOOST_HAS_RVALUE_REFS
- //! \overload
- template <class Range2T>
- explicit finder_t (
- Sequence1T const &&substring,
- Sequence2T *const string = 0,
- ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT())
- : comparator_(comparator), allocator_(allocator),
- substring_optional_copy_(std::move(substring)), string_optional_copy_(),
- substring_range_(substring_optional_copy_), string_range_(string?*string:string_optional_copy_),
- start_offset_(boost::begin(string_range_)),
- algorithm_type(),
- substring_has_changed_(true), string_has_changed_(true)
- { /*on_substring_change(); on_string_change();*/ }
-
- //! \overload
- template <class Range2T>
- explicit finder_t (
- Sequence1T const &&substring,
- const Range2T &string,
- ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT(),
- typename boost::disable_if<typename ::boost::algorithm::detail::is_pointer_to<Range2T,Sequence2T> >::type* = 0)
- : comparator_(comparator), allocator_(allocator),
- substring_optional_copy_(std::move(substring)), substring_range_(substring_optional_copy_),
- string_optional_copy_(), string_range_(),
- start_offset_(),
- algorithm_type(),
- substring_has_changed_(true), string_has_changed_(true)
- { set_string(string); /*on_substring_change();*/ }
-
- //! \overload
- finder_t (
- Sequence1T const &&substring,
- Sequence2T &&string,
- ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT())
- : comparator_(comparator), allocator_(allocator),
- substring_optional_copy_(std::move(substring)), string_optional_copy_(std::move(string)),
- substring_range_(substring_optional_copy_), string_range_(string_optional_copy_),
- start_offset_(boost::begin(string_range_)),
- algorithm_type(),
- substring_has_changed_(true), string_has_changed_(true)
- { /*on_substring_change(); on_string_change();*/ }
-
- //! \overload
- finder_t (const Sequence1T *const substring,
- Sequence2T &&string,
- ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT())
- : comparator_(comparator), allocator_(allocator),
- substring_optional_copy_(), string_optional_copy_(std::move(string)),
- substring_range_(substring?*substring:substring_optional_copy_), string_range_(string_optional_copy_),
- start_offset_(boost::begin(string_range_)),
- algorithm_type(),
- substring_has_changed_(true), string_has_changed_(true)
- { /*on_substring_change(); on_string_change();*/ }
+ //! \overload
+ template <class Range2T>
+ explicit finder_t (
+ Sequence1T const &&substring,
+ Sequence2T *const string = 0,
+ ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT())
+ : comparator_(comparator), allocator_(allocator),
+ substring_optional_copy_(std::move(substring)), string_optional_copy_(),
+ substring_range_(substring_optional_copy_), string_range_(string?*string:string_optional_copy_),
+ start_offset_(boost::begin(string_range_)),
+ algorithm_type(),
+ substring_has_changed_(true), string_has_changed_(true)
+ { /*on_substring_change(); on_string_change();*/ }
+
+ //! \overload
+ template <class Range2T>
+ explicit finder_t (
+ Sequence1T const &&substring,
+ const Range2T &string,
+ ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT(),
+ typename boost::disable_if<typename ::boost::algorithm::detail::is_pointer_to<Range2T,Sequence2T> >::type* = 0)
+ : comparator_(comparator), allocator_(allocator),
+ substring_optional_copy_(std::move(substring)), substring_range_(substring_optional_copy_),
+ string_optional_copy_(), string_range_(),
+ start_offset_(),
+ algorithm_type(),
+ substring_has_changed_(true), string_has_changed_(true)
+ { set_string(string); /*on_substring_change();*/ }
+
+ //! \overload
+ finder_t (
+ Sequence1T const &&substring,
+ Sequence2T &&string,
+ ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT())
+ : comparator_(comparator), allocator_(allocator),
+ substring_optional_copy_(std::move(substring)), string_optional_copy_(std::move(string)),
+ substring_range_(substring_optional_copy_), string_range_(string_optional_copy_),
+ start_offset_(boost::begin(string_range_)),
+ algorithm_type(),
+ substring_has_changed_(true), string_has_changed_(true)
+ { /*on_substring_change(); on_string_change();*/ }
+
+ //! \overload
+ finder_t (const Sequence1T *const substring,
+ Sequence2T &&string,
+ ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT())
+ : comparator_(comparator), allocator_(allocator),
+ substring_optional_copy_(), string_optional_copy_(std::move(string)),
+ substring_range_(substring?*substring:substring_optional_copy_), string_range_(string_optional_copy_),
+ start_offset_(boost::begin(string_range_)),
+ algorithm_type(),
+ substring_has_changed_(true), string_has_changed_(true)
+ { /*on_substring_change(); on_string_change();*/ }
             
- //! \overload
- template <class Range1T>
- finder_t (const Range1T &substring,
- Sequence2T &&string,
- ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT(),
- typename boost::disable_if<typename ::boost::algorithm::detail::is_pointer_to<Range1T,Sequence1T> >::type* = 0)
- : comparator_(comparator), allocator_(allocator),
- substring_optional_copy_(), substring_range_(),
- string_optional_copy_(std::move(string)), string_range_(string_optional_copy_),
- start_offset_(boost::begin(string_range_)),
- algorithm_type(),
- substring_has_changed_(true), string_has_changed_(true)
- { set_substring(substring); /*on_string_change();*/ }
+ //! \overload
+ template <class Range1T>
+ finder_t (const Range1T &substring,
+ Sequence2T &&string,
+ ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT(),
+ typename boost::disable_if<typename ::boost::algorithm::detail::is_pointer_to<Range1T,Sequence1T> >::type* = 0)
+ : comparator_(comparator), allocator_(allocator),
+ substring_optional_copy_(), substring_range_(),
+ string_optional_copy_(std::move(string)), string_range_(string_optional_copy_),
+ start_offset_(boost::begin(string_range_)),
+ algorithm_type(),
+ substring_has_changed_(true), string_has_changed_(true)
+ { set_substring(substring); /*on_string_change();*/ }
 
 # endif
 
 
- //! Get an iterator range of the currently stored substring
- /*!
- \return An iterator range of the currently stored substring
- */
- //! \todo This may return iterators for copies of the string, properly deal with it.
- typename substring_range_type get_substring_range() const
- { return substring_range_; }
-
- //! Get an iterator range of the currently stored string
- /*!
- \return An iterator range of the currently stored string
- */
- typename string_range_type get_string_range() const
- { return string_range_; }
-
- //! Gets a reference to an instance of the comparator in use
- typename boost::call_traits<comparator_type>::reference get_comparator()
- { return comparator_; }
+ //! Get an iterator range of the currently stored substring
+ /*!
+ \return An iterator range of the currently stored substring
+ */
+ //! \todo This may return iterators for copies of the string, properly deal with it.
+ typename substring_range_type get_substring_range() const
+ { return substring_range_; }
+
+ //! Get an iterator range of the currently stored string
+ /*!
+ \return An iterator range of the currently stored string
+ */
+ typename string_range_type get_string_range() const
+ { return string_range_; }
 
- typename boost::call_traits<comparator_type>::const_reference get_comparator() const
- { return comparator_; }
+ //! Gets a reference to an instance of the comparator in use
+ typename boost::call_traits<comparator_type>::const_reference get_comparator() const
+ { return comparator_; }
 
- //! \todo: any reason why stdlib get_allocator()s return value types?
+ //! \todo: any reason why stdlib get_allocator()s return value types?
 
            
- //! Gets a reference to the current allocator
- typename boost::call_traits<allocator_type>::reference get_allocator()
- { return allocator_; }
+ //! Gets a reference to the current allocator
+ typename boost::call_traits<allocator_type>::reference get_allocator()
+ { return allocator_; }
             
- /*!
- \overload
- */
- typename boost::call_traits<allocator_type>::const_reference get_allocator() const
- { return allocator_; }
+ /*!
+ \overload
+ */
+ typename boost::call_traits<allocator_type>::const_reference get_allocator() const
+ { return allocator_; }
 
- //! Changes the substring to be searched for.
- /*!
- \param substring Either a range (or character array) corresponding to the new substring,
- or a pointer to a sequence of type \c substring_type
- */
- template <typename RangeT>
- void set_substring(RangeT const &substring,
- typename boost::disable_if<
- typename ::boost::algorithm::detail::is_pointer_to<RangeT,Sequence1T> >::type* = 0)
- {
- boost::iterator_range<typename boost::range_const_iterator<RangeT>::type> substring_range =
- boost::as_literal(substring);
- //substring_optional_copy_.assign(boost::begin(substring_range), boost::end(substring_range));
- substring_optional_copy_.clear();
- substring_optional_copy_.insert(substring_optional_copy_.end(),
- boost::begin(substring_range), boost::end(substring_range));
- substring_range_ = substring_optional_copy_;
- substring_has_changed_ = true;
- }
+ //! Changes the substring to be searched for.
+ /*!
+ \param substring Either a range (or character array) corresponding to the new substring,
+ or a pointer to a sequence of type \c substring_type
+ */
+ template <typename RangeT>
+ void set_substring(RangeT const &substring,
+ typename boost::disable_if<
+ typename ::boost::algorithm::detail::is_pointer_to<RangeT,Sequence1T> >::type* = 0)
+ {
+ boost::iterator_range<typename boost::range_const_iterator<RangeT>::type> substring_range =
+ boost::as_literal(substring);
+ //substring_optional_copy_.assign(boost::begin(substring_range), boost::end(substring_range));
+ substring_optional_copy_.clear();
+ substring_optional_copy_.insert(substring_optional_copy_.end(),
+ boost::begin(substring_range), boost::end(substring_range));
+ substring_range_ = substring_optional_copy_;
+ substring_has_changed_ = true;
+ }
             
- void set_substring (Sequence1T const *const substring = 0)
- {
- substring_optional_copy_.clear();
- if (substring)
- substring_range_ = *substring;
- else
- substring_range_ = substring_optional_copy_;
- substring_has_changed_ = true;
- }
+ void set_substring (Sequence1T const *const substring = 0)
+ {
+ substring_optional_copy_.clear();
+ if (substring)
+ substring_range_ = *substring;
+ else
+ substring_range_ = substring_optional_copy_;
+ substring_has_changed_ = true;
+ }
 
 # ifdef BOOST_HAS_RVALUE_REFS
- void set_substring (
- Sequence1T const &&substring)
- {
- substring_optional_copy_ = std::move(substring);
- substring_range_ = substring_optional_copy_;
- substring_has_changed_ = true;
- }
+ void set_substring (
+ Sequence1T const &&substring)
+ {
+ substring_optional_copy_ = std::move(substring);
+ substring_range_ = substring_optional_copy_;
+ substring_has_changed_ = true;
+ }
 # endif
 
- //! Changes the string to be searched for.
- /*!
- \param string Either a range (or character array) corresponding to the new substring,
- or a pointer to a sequence of type \c string_type
- */
- template <typename RangeT>
- void set_string(RangeT const &string,
- typename boost::disable_if<typename ::boost::algorithm::detail::is_pointer_to<RangeT,Sequence2T> >::type* = 0)
- {
- boost::iterator_range<typename boost::range_const_iterator<RangeT>::type> string_range =
- boost::as_literal(string);
- //string_optional_copy_.assign(boost::begin(string_range), boost::end(string_range));
- string_optional_copy_.clear();
- string_optional_copy_.insert(string_optional_copy_.end(),
- boost::begin(string_range), boost::end(string_range));
- string_range_ = string_optional_copy_;
- string_has_changed_ = true;
- }
+ //! Changes the string to be searched for.
+ /*!
+ \param string Either a range (or character array) corresponding to the new substring,
+ or a pointer to a sequence of type \c string_type
+ */
+ template <typename RangeT>
+ void set_string(RangeT const &string,
+ typename boost::disable_if<typename ::boost::algorithm::detail::is_pointer_to<RangeT,Sequence2T> >::type* = 0)
+ {
+ boost::iterator_range<typename boost::range_const_iterator<RangeT>::type> string_range =
+ boost::as_literal(string);
+ //string_optional_copy_.assign(boost::begin(string_range), boost::end(string_range));
+ string_optional_copy_.clear();
+ string_optional_copy_.insert(string_optional_copy_.end(),
+ boost::begin(string_range), boost::end(string_range));
+ string_range_ = string_optional_copy_;
+ string_has_changed_ = true;
+ }
             
- void set_string (Sequence2T *const string = 0)
- {
- string_optional_copy_.clear();
- if (string)
- string_range_ = *string;
- else
- string_range_ = string_optional_copy_;
- string_has_changed_ = true;
- }
+ void set_string (Sequence2T *const string = 0)
+ {
+ string_optional_copy_.clear();
+ if (string)
+ string_range_ = *string;
+ else
+ string_range_ = string_optional_copy_;
+ string_has_changed_ = true;
+ }
 
 # ifdef BOOST_HAS_RVALUE_REFS
- void set_string (
- Sequence2T &&string)
- {
- string_optional_copy_ = std::move(string);
- string_range_ = string_optional_copy_;
- string_has_changed_ = true;
- }
+ void set_string (
+ Sequence2T &&string)
+ {
+ string_optional_copy_ = std::move(string);
+ string_range_ = string_optional_copy_;
+ string_has_changed_ = true;
+ }
 # endif
 
- //! \todo Change the object's substring or just use a temporary one?
- //! \todo Maybe this shouldn't be a part of finder_t, but a part of a certain AdditionalBehaviorT
- //! Perform a search...
- /*!
- \deprecated Only implemented to preserve compatibility
- with the previous Finder concept
- \todo This should probably only exist to classes that derive from finder_t (such as first_finder_t etc.)
- */
- string_range_type operator()(string_iterator_type const &string_start,
- string_iterator_type const &string_end)
- {
- set_string( boost::make_iterator_range(string_start, string_end) );
- return find_first();
- }
+ //! \todo Change the object's substring or just use a temporary one?
+ //! \todo Maybe this shouldn't be a part of finder_t, but a part of a certain AdditionalBehaviorT
+ //! Perform a search...
+ /*!
+ \deprecated Only implemented to preserve compatibility
+ with the previous Finder concept
+ \todo This should probably only exist to classes that derive from finder_t (such as first_finder_t etc.)
+ */
+ string_range_type operator()(string_iterator_type const &string_start,
+ string_iterator_type const &string_end)
+ {
+ set_string( boost::make_iterator_range(string_start, string_end) );
+ return find_first();
+ }
 
- void use_internal_string()
- {
- string_has_changed_ = true;
- find_reset();
- string_range_ = string_optional_copy_;
- }
+ void use_internal_string()
+ {
+ string_has_changed_ = true;
+ find_reset();
+ string_range_ = string_optional_copy_;
+ }
            
- //! Performs a search using the chosen algorithm.
- /*!
- Looks for the first match of the substring in the string.
- \return An iterator range indicating where in the string
- a match has occured. If there is no match, an iterator range with
- begin()==end()==string.end() is returned.
- \pre The iterator ranges for the string and substring must be set.
- \post The internal find offset is set immediately after the current match starts.
- \note This is semantically equivalent to \c find_reset(); match=find_next();
- */
-
- //!\todo Must return a range of const iterators, otherwise one could modify
- //! the range's contents, range which may actually
- //! be data of our private member
- string_range_type find_first()
- {
- //assert(substring_ && string_);
- //return algorithm_type::find(boost::begin(string_));
- find_reset();
- return find_next();
- }
+ //! Performs a search using the chosen algorithm.
+ /*!
+ Looks for the first match of the substring in the string.
+ \return An iterator range indicating where in the string
+ a match has occurred. If there is no match, an iterator range with
+ begin()==end()==string.end() is returned.
+ \pre The iterator ranges for the string and substring must be set.
+ \post The internal find offset is set immediately after the current match starts.
+ \note This is semantically equivalent to \c find_reset(); match=find_next();
+ */
 
- string_difference_type find_first_index()
- {
- find_reset();
- return find_next_index();
- }
+ //!\todo Must return a range of const iterators, otherwise one could modify
+ //! the range's contents, range which may actually
+ //! be data of our private member
+ string_range_type find_first()
+ {
+ //assert(substring_ && string_);
+ //return algorithm_type::find(boost::begin(string_));
+ find_reset();
+ return find_next();
+ }
+
+ string_difference_type find_first_index()
+ {
+ find_reset();
+ return find_next_index();
+ }
 
- string_range_type find_next()
+ string_range_type find_next()
+ {
+ apply_changes();
+ if (start_offset_ == boost::end(string_range_))
+ return boost::iterator_range<string_iterator_type>(
+ start_offset_, start_offset_
+ );
+ string_range_type ret =
+ algorithm_type::find(start_offset_);
+ if (boost::begin(ret) == boost::end(string_range_))
             {
- apply_changes();
- if (start_offset_ == boost::end(string_range_))
- return boost::iterator_range<string_iterator_type>(
- start_offset_, start_offset_
- );
- string_range_type ret =
- algorithm_type::find(start_offset_);
- if (boost::begin(ret) == boost::end(string_range_))
- {
- start_offset_ = boost::end(string_range_);
- return ret;
- }
- else
- {
- start_offset_ = boost::begin(ret);
- ++start_offset_;
- return ret;
- }
+ start_offset_ = boost::end(string_range_);
+ return ret;
             }
-
- string_difference_type find_next_index()
+ else
             {
- apply_changes();
-
- //empty substring
- if (boost::begin(substring_range_) == boost::end(substring_range_))
- {
- if (start_offset_ == boost::end(string_range_))
- return static_cast<string_difference_type>(-1);
- return std::distance(boost::begin(string_range_),start_offset_++);
- }
-
- string_range_type const &match = find_next();
- if (boost::begin(match) == boost::end(string_range_))
- return static_cast<string_difference_type>(-1);
- return std::distance(boost::begin(string_range_), boost::begin(match));
+ start_offset_ = boost::begin(ret);
+ ++start_offset_;
+ return ret;
             }
+ }
 
- void find_reset()
+ string_difference_type find_next_index()
+ {
+ apply_changes();
+
+ //empty substring
+ if (boost::begin(substring_range_) == boost::end(substring_range_))
             {
- start_offset_ = boost::begin(string_range_);
+ //empty string, empty substring
+ //!\todo if this gets called more times, it always returns 0
+ //! i.e. the pointer is not moved. what would be a good solution for that?
+ //! maybe a special dummy value for the range?
+ if (boost::begin(string_range_) == boost::end(string_range_))
+ return static_cast<string_difference_type>(0);
+ //empty substring, offset at the end of the range
+ if (start_offset_ == boost::end(string_range_))
+ return static_cast<string_difference_type>(-1);
+ //empty substring, offset not at the end of range
+ return std::distance(boost::begin(string_range_),start_offset_++);
             }
-
- //! \todo: Figure out whether you want to make finder iterators or not. find_iterator can be used otherwise.
- //const_iterator begin() const { }
- //const_iterator end() const { }
- private:
- inline void apply_changes()
+ else if (boost::begin(string_range_) == boost::end(string_range_))
             {
- if (substring_has_changed_ || string_has_changed_) {
- find_reset();
- if (substring_has_changed_) {
- on_substring_change();
- substring_has_changed_ = false;
- }
- if (string_has_changed_) {
- on_string_change();
- string_has_changed_ = false;
- }
- }
+ //empty string, nonempty substring
+ return static_cast<string_difference_type>(-1);
             }
 
- substring_type substring_optional_copy_;
- string_type string_optional_copy_;
- comparator_type comparator_;
- allocator_type allocator_;
- substring_range_type substring_range_;
- string_range_type string_range_;
- string_iterator_type start_offset_;
- bool substring_has_changed_, string_has_changed_;
- };
-
- struct finder_no_additional_behavior
- { template <class,class,class,class,class,class> struct apply { }; };
- //struct finder_functor_first_finder {};
- //struct finder_functor_last_finder {};
- //struct finder_functor_nth_finder {};
-
-
-// Finder generators ------------------------------------------//
-
- //! "First" finder generator
- /*!
- Construct the \c first_finder_t. The finder searches for the first
- occurrence of the string in a given input.
- The result is given as an \c iterator_range delimiting the match.
-
- \param Search A substring to be searched for.
- \param Comp An element comparison predicate
- \return An instance of the \c first_finder object
- */
- template<typename RangeT>
- inline detail::first_finderF<
- BOOST_STRING_TYPENAME range_const_iterator<RangeT>::type,
- is_equal>
- first_finder( const RangeT& Search )
- {
- return
- detail::first_finderF<
- BOOST_STRING_TYPENAME
- range_const_iterator<RangeT>::type,
- is_equal>( ::boost::as_literal(Search), is_equal() ) ;
- }
-
- //! "First" finder
- /*!
- \overload
- */
- template<typename RangeT,typename PredicateT>
- inline detail::first_finderF<
- BOOST_STRING_TYPENAME range_const_iterator<RangeT>::type,
- PredicateT>
- first_finder(
- const RangeT& Search, PredicateT Comp )
- {
- return
- detail::first_finderF<
- BOOST_STRING_TYPENAME
- range_const_iterator<RangeT>::type,
- PredicateT>( ::boost::as_literal(Search), Comp );
+ //perform an actual search
+ string_range_type const &match = find_next();
+ if (boost::begin(match) == boost::end(string_range_))
+ return static_cast<string_difference_type>(-1);
+ return std::distance(boost::begin(string_range_), boost::begin(match));
         }
 
- //! "Last" finder
- /*!
- Construct the \c last_finder. The finder searches for the last
- occurrence of the string in a given input.
- The result is given as an \c iterator_range delimiting the match.
-
- \param Search A substring to be searched for.
- \param Comp An element comparison predicate
- \return An instance of the \c last_finder object
- */
- template<typename RangeT>
- inline detail::last_finderF<
- BOOST_STRING_TYPENAME range_const_iterator<RangeT>::type,
- is_equal>
- last_finder( const RangeT& Search )
- {
- return
- detail::last_finderF<
- BOOST_STRING_TYPENAME
- range_const_iterator<RangeT>::type,
- is_equal>( ::boost::as_literal(Search), is_equal() );
- }
- //! "Last" finder
- /*!
- \overload
- */
- template<typename RangeT, typename PredicateT>
- inline detail::last_finderF<
- BOOST_STRING_TYPENAME range_const_iterator<RangeT>::type,
- PredicateT>
- last_finder( const RangeT& Search, PredicateT Comp )
- {
- return
- detail::last_finderF<
- BOOST_STRING_TYPENAME
- range_const_iterator<RangeT>::type,
- PredicateT>( ::boost::as_literal(Search), Comp ) ;
- }
-
- //! "Nth" finder
- /*!
- Construct the \c nth_finder. The finder searches for the n-th (zero-indexed)
- occurrence of the string in a given input.
- The result is given as an \c iterator_range delimiting the match.
-
- \param Search A substring to be searched for.
- \param Nth An index of the match to be find
- \param Comp An element comparison predicate
- \return An instance of the \c nth_finder object
- */
- template<typename RangeT>
- inline detail::nth_finderF<
- BOOST_STRING_TYPENAME range_const_iterator<RangeT>::type,
- is_equal>
- nth_finder(
- const RangeT& Search,
- int Nth)
- {
- return
- detail::nth_finderF<
- BOOST_STRING_TYPENAME
- range_const_iterator<RangeT>::type,
- is_equal>( ::boost::as_literal(Search), Nth, is_equal() ) ;
- }
- //! "Nth" finder
- /*!
- \overload
- */
- template<typename RangeT, typename PredicateT>
- inline detail::nth_finderF<
- BOOST_STRING_TYPENAME range_const_iterator<RangeT>::type,
- PredicateT>
- nth_finder(
- const RangeT& Search,
- int Nth,
- PredicateT Comp )
- {
- return
- detail::nth_finderF<
- BOOST_STRING_TYPENAME
- range_const_iterator<RangeT>::type,
- PredicateT>( ::boost::as_literal(Search), Nth, Comp );
- }
-
- //! "Head" finder
- /*!
- Construct the \c head_finder. The finder returns a head of a given
- input. The head is a prefix of a string up to n elements in
- size. If an input has less then n elements, whole input is
- considered a head.
- The result is given as an \c iterator_range delimiting the match.
-
- \param N The size of the head
- \return An instance of the \c head_finder object
- */
- inline detail::head_finderF
- head_finder( int N )
+ void find_reset()
         {
- return detail::head_finderF(N);
+ start_offset_ = boost::begin(string_range_);
         }
-
- //! "Tail" finder
- /*!
- Construct the \c tail_finder. The finder returns a tail of a given
- input. The tail is a suffix of a string up to n elements in
- size. If an input has less then n elements, whole input is
- considered a head.
- The result is given as an \c iterator_range delimiting the match.
 
- \param N The size of the head
- \return An instance of the \c tail_finder object
- */
- inline detail::tail_finderF
- tail_finder( int N )
+ //! \todo: Figure out whether you want to make finder iterators or not. find_iterator can be used otherwise.
+ //const_iterator begin() const { }
+ //const_iterator end() const { }
+ private:
+ inline void apply_changes()
         {
- return detail::tail_finderF(N);
+ if (substring_has_changed_ || string_has_changed_) {
+ find_reset();
+ if (substring_has_changed_) {
+ on_substring_change();
+ substring_has_changed_ = false;
+ }
+ if (string_has_changed_) {
+ on_string_change();
+ string_has_changed_ = false;
+ }
+ }
         }
 
- //! "Token" finder
- /*!
- Construct the \c token_finder. The finder searches for a token
- specified by a predicate. It is similar to std::find_if
- algorithm, with an exception that it return a range of
- instead of a single iterator.
-
- If "compress token mode" is enabled, adjacent matching tokens are
- concatenated into one match. Thus the finder can be used to
- search for continuous segments of characters satisfying the
- given predicate.
-
- The result is given as an \c iterator_range delimiting the match.
-
- \param Pred An element selection predicate
- \param eCompress Compress flag
- \return An instance of the \c token_finder object
- */
- template< typename PredicateT >
- inline detail::token_finderF<PredicateT>
- token_finder(
- PredicateT Pred,
- token_compress_mode_type eCompress=token_compress_off )
+ substring_type substring_optional_copy_;
+ string_type string_optional_copy_;
+ comparator_type comparator_;
+ allocator_type allocator_;
+ substring_range_type substring_range_;
+ string_range_type string_range_;
+ string_iterator_type start_offset_;
+ bool substring_has_changed_, string_has_changed_;
+ };
+
+ //struct finder_no_additional_behavior
+ //{ template <class,class,class,class,class,class> struct apply { }; };
+
+ struct finder_behavior_first_finder
+ {
+ template <class FinderT,class Range1T,class Range2T,class AlgorithmT,class ComparatorT,class AllocatorT>
+ struct apply
         {
- return detail::token_finderF<PredicateT>( Pred, eCompress );
- }
-
- //! "Range" finder
- /*!
- Construct the \c range_finder. The finder does not perform
- any operation. It simply returns the given range for
- any input.
-
- \param Begin Beginning of the range
- \param End End of the range
- \param Range The range.
- \return An instance of the \c range_finger object
- */
- template< typename ForwardIteratorT >
- inline detail::range_finderF<ForwardIteratorT>
- range_finder(
- ForwardIteratorT Begin,
- ForwardIteratorT End )
+ private:
+ typedef boost::algorithm::detail::finder_typedefs<Range1T, Range2T, ComparatorT, AllocatorT> typedefs;
+ public:
+ typename typedefs::string_range_type operator()
+ (typename typedefs::string_iterator_type &begin, typename typedefs::string_iterator_type &end)
+ {
+ //!\todo impl
+ //boost::iterator_range<string_iterator_type>
+ //set_string();
+ }
+ };
+ };
+ struct finder_behavior_last_finder
+ {
+ template <class FinderT,class Range1T,class Range2T,class AlgorithmT,class ComparatorT,class AllocatorT>
+ struct apply
         {
- return detail::range_finderF<ForwardIteratorT>( Begin, End );
- }
-
- //! "Range" finder
- /*!
- \overload
- */
- template< typename ForwardIteratorT >
- inline detail::range_finderF<ForwardIteratorT>
- range_finder( iterator_range<ForwardIteratorT> Range )
+ };
+ };
+ struct finder_behavior_nth_finder
+ {
+ template <class FinderT,class Range1T,class Range2T,class AlgorithmT,class ComparatorT,class AllocatorT>
+ struct apply
         {
- return detail::range_finderF<ForwardIteratorT>( Range );
- }
 
- } // namespace algorithm
+ };
+ };
+
+// Finder generators ------------------------------------------//
+
+ //! "First" finder generator
+ /*!
+ Construct the \c first_finder_t. The finder searches for the first
+ occurrence of the string in a given input.
+ The result is given as an \c iterator_range delimiting the match.
+
+ \param Search A substring to be searched for.
+ \param Comp An element comparison predicate
+ \return An instance of the \c first_finder object
+ \deprecated
+ */
+ /*template<typename RangeT>
+ inline detail::first_finderF<
+ BOOST_STRING_TYPENAME range_const_iterator<RangeT>::type,
+ is_equal>
+ first_finder( const RangeT& Search )
+ {
+ return
+ detail::first_finderF<
+ BOOST_STRING_TYPENAME
+ range_const_iterator<RangeT>::type,
+ is_equal>( ::boost::as_literal(Search), is_equal() ) ;
+ }*/
+
+ //! "First" finder
+ /*!
+ \overload
+ \deprecated
+ */
+ /*template<typename RangeT,typename PredicateT>
+ inline detail::first_finderF<
+ BOOST_STRING_TYPENAME range_const_iterator<RangeT>::type,
+ PredicateT>
+ first_finder(
+ const RangeT& Search, PredicateT Comp )
+ {
+ return
+ detail::first_finderF<
+ BOOST_STRING_TYPENAME
+ range_const_iterator<RangeT>::type,
+ PredicateT>( ::boost::as_literal(Search), Comp );
+ }*/
+ template<typename RangeT,typename PredicateT, typename AlgorithmTagT>
+ inline boost::algorithm::simplified_finder_t<RangeT, RangeT, typename AlgorithmTagT::type,
+ PredicateT, std::allocator<std::size_t>, boost::algorithm::finder_behavior_first_finder>
+ first_finder(
+ const RangeT& Search, PredicateT const& Comp,
+ AlgorithmTagT const&)
+ {
+ boost::algorithm::simplified_finder_t<RangeT, RangeT, typename AlgorithmTagT::type,
+ PredicateT, std::allocator<std::size_t>, boost::algorithm::finder_behavior_first_finder>
+ finder;
+ finder.set_substring(&Search);
+ return finder;
+ }
+
+ template<typename RangeT,typename PredicateT>
+ inline boost::algorithm::simplified_finder_t<RangeT, RangeT, boost::algorithm::default_finder_algorithm,
+ PredicateT, std::allocator<std::size_t>, boost::algorithm::finder_behavior_first_finder>
+ first_finder(
+ const RangeT& Search, PredicateT const& Comp)
+ {
+ return boost::algorithm::first_finder(Search,Comp, boost::algorithm::default_finder_algorithm_tag());
+ }
+
+ template<typename RangeT>
+ inline boost::algorithm::simplified_finder_t<RangeT, RangeT, boost::algorithm::default_finder_algorithm,
+ boost::algorithm::is_equal, std::allocator<std::size_t>, boost::algorithm::finder_behavior_first_finder>
+ first_finder(
+ const RangeT& Search)
+ {
+ return boost::algorithm::first_finder(Search,
+ boost::algorithm::is_equal(), boost::algorithm::default_finder_algorithm_tag());
+ }
+ //! "Last" finder
+ /*!
+ Construct the \c last_finder. The finder searches for the last
+ occurrence of the string in a given input.
+ The result is given as an \c iterator_range delimiting the match.
+
+ \param Search A substring to be searched for.
+ \param Comp An element comparison predicate
+ \return An instance of the \c last_finder object
+ \deprecated
+ */
+ /*template<typename RangeT>
+ inline detail::last_finderF<
+ BOOST_STRING_TYPENAME range_const_iterator<RangeT>::type,
+ is_equal>
+ last_finder( const RangeT& Search )
+ {
+ return
+ detail::last_finderF<
+ BOOST_STRING_TYPENAME
+ range_const_iterator<RangeT>::type,
+ is_equal>( ::boost::as_literal(Search), is_equal() );
+ }*/
+ //! "Last" finder
+ /*!
+ \overload
+ */
+ template<typename RangeT, typename PredicateT, typename AlgorithmTagT>
+ inline boost::algorithm::simplified_finder_t<RangeT, RangeT, typename AlgorithmTagT::type,
+ PredicateT, std::allocator<std::size_t>, boost::algorithm::finder_behavior_last_finder>
+ last_finder( const RangeT& Search, PredicateT const &Comp,
+ AlgorithmTagT const&)
+ {
+ boost::algorithm::simplified_finder_t<RangeT, RangeT, typename AlgorithmTagT::type,
+ PredicateT, std::allocator<std::size_t>, boost::algorithm::finder_behavior_last_finder>
+ finder;
+ finder.set_substring(&Search);
+ return finder;
+ }
+
+ template<typename RangeT, typename PredicateT>
+ inline boost::algorithm::simplified_finder_t<RangeT, RangeT, boost::algorithm::default_finder_algorithm,
+ PredicateT, std::allocator<std::size_t>, boost::algorithm::finder_behavior_last_finder>
+ last_finder( const RangeT& Search, PredicateT const &Comp)
+ {
+ return boost::algorithm::last_finder(Search, Comp,
+ boost::algorithm::default_finder_algorithm_tag());
+ }
+
+ template<typename RangeT>
+ inline boost::algorithm::simplified_finder_t<RangeT, RangeT, boost::algorithm::default_finder_algorithm,
+ boost::algorithm::is_equal, std::allocator<std::size_t>, boost::algorithm::finder_behavior_last_finder>
+ last_finder( const RangeT& Search)
+ {
+ return boost::algorithm::last_finder(Search,
+ boost::algorithm::is_equal(), boost::algorithm::default_finder_algorithm_tag());
+ }
+
+ //! "Nth" finder
+ /*!
+ Construct the \c nth_finder. The finder searches for the n-th (zero-indexed)
+ occurrence of the string in a given input.
+ The result is given as an \c iterator_range delimiting the match.
+
+ \param Search A substring to be searched for.
+ \param Nth An index of the match to be find
+ \param Comp An element comparison predicate
+ \return An instance of the \c nth_finder object
+ \deprecated
+ */
+ /*template<typename RangeT>
+ inline detail::nth_finderF<
+ BOOST_STRING_TYPENAME range_const_iterator<RangeT>::type,
+ is_equal>
+ nth_finder(
+ const RangeT& Search,
+ int Nth)
+ {
+ return
+ detail::nth_finderF<
+ BOOST_STRING_TYPENAME
+ range_const_iterator<RangeT>::type,
+ is_equal>( ::boost::as_literal(Search), Nth, is_equal() ) ;
+ }*/
+ //! "Nth" finder
+ /*!
+ \overload
+ \deprecated
+ */
+ template<typename RangeT, typename PredicateT, typename AlgorithmTagT>
+ inline boost::algorithm::simplified_finder_t<RangeT, RangeT, typename AlgorithmTagT::type,
+ PredicateT, std::allocator<std::size_t>, boost::algorithm::finder_behavior_nth_finder>
+ nth_finder(const RangeT& Search, int Nth, PredicateT const &Comp, AlgorithmTagT const &)
+ {
+ boost::algorithm::simplified_finder_t<RangeT, RangeT, typename AlgorithmTagT::type,
+ PredicateT, std::allocator<std::size_t>, boost::algorithm::finder_behavior_nth_finder>
+ finder;
+ finder.set_N(Nth);
+ finder.set_substring(&Search);
+ return finder;
+ }
+
+ template<typename RangeT, typename PredicateT>
+ inline boost::algorithm::simplified_finder_t<RangeT, RangeT, boost::algorithm::is_equal,
+ PredicateT, std::allocator<std::size_t>, boost::algorithm::finder_behavior_nth_finder>
+ nth_finder(const RangeT& Search, int Nth, PredicateT const &Comp)
+ {
+ return boost::algorithm::nth_finder(Search, Nth, Comp,
+ boost::algorithm::default_finder_algorithm_tag());
+ }
+
+ template<typename RangeT>
+ inline boost::algorithm::simplified_finder_t<RangeT, RangeT, boost::algorithm::is_equal,
+ boost::algorithm::is_equal, std::allocator<std::size_t>, boost::algorithm::finder_behavior_nth_finder>
+ nth_finder(const RangeT& Search, int Nth)
+ {
+ return boost::algorithm::nth_finder(Search, Nth, boost::algorithm::is_equal(),
+ boost::algorithm::default_finder_algorithm_tag());
+ }
+
+ //! "Head" finder
+ /*!
+ Construct the \c head_finder. The finder returns a head of a given
+ input. The head is a prefix of a string up to n elements in
+ size. If an input has less then n elements, whole input is
+ considered a head.
+ The result is given as an \c iterator_range delimiting the match.
+
+ \param N The size of the head
+ \return An instance of the \c head_finder object
+ */
+ inline detail::head_finderF
+ head_finder( int N )
+ {
+ return detail::head_finderF(N);
+ }
+
+ //! "Tail" finder
+ /*!
+ Construct the \c tail_finder. The finder returns a tail of a given
+ input. The tail is a suffix of a string up to n elements in
+ size. If an input has less then n elements, whole input is
+ considered a head.
+ The result is given as an \c iterator_range delimiting the match.
+
+ \param N The size of the head
+ \return An instance of the \c tail_finder object
+ */
+ inline detail::tail_finderF
+ tail_finder( int N )
+ {
+ return detail::tail_finderF(N);
+ }
+
+ //! "Token" finder
+ /*!
+ Construct the \c token_finder. The finder searches for a token
+ specified by a predicate. It is similar to std::find_if
+ algorithm, with an exception that it return a range of
+ instead of a single iterator.
+
+ If "compress token mode" is enabled, adjacent matching tokens are
+ concatenated into one match. Thus the finder can be used to
+ search for continuous segments of characters satisfying the
+ given predicate.
+
+ The result is given as an \c iterator_range delimiting the match.
+
+ \param Pred An element selection predicate
+ \param eCompress Compress flag
+ \return An instance of the \c token_finder object
+ */
+ template< typename PredicateT >
+ inline detail::token_finderF<PredicateT>
+ token_finder(
+ PredicateT Pred,
+ token_compress_mode_type eCompress=token_compress_off )
+ {
+ return detail::token_finderF<PredicateT>( Pred, eCompress );
+ }
+
+ //! "Range" finder
+ /*!
+ Construct the \c range_finder. The finder does not perform
+ any operation. It simply returns the given range for
+ any input.
+
+ \param Begin Beginning of the range
+ \param End End of the range
+ \param Range The range.
+ \return An instance of the \c range_finger object
+ */
+ template< typename ForwardIteratorT >
+ inline detail::range_finderF<ForwardIteratorT>
+ range_finder(
+ ForwardIteratorT Begin,
+ ForwardIteratorT End )
+ {
+ return detail::range_finderF<ForwardIteratorT>( Begin, End );
+ }
+
+ //! "Range" finder
+ /*!
+ \overload
+ */
+ template< typename ForwardIteratorT >
+ inline detail::range_finderF<ForwardIteratorT>
+ range_finder( iterator_range<ForwardIteratorT> Range )
+ {
+ return detail::range_finderF<ForwardIteratorT>( Range );
+ }
+
+} } // namespace algorithm, namespace boost
 
+namespace boost
+{
     // pull the names to the boost namespace
     using algorithm::first_finder;
     using algorithm::last_finder;
@@ -888,6 +1061,11 @@
     //! \TODO: any other finder types?
     using algorithm::finder_t;
     using algorithm::simplified_finder_t;
+ //using algorithm::finder_no_additional_behavior;
+ using algorithm::finder_behavior_first_finder;
+ using algorithm::finder_behavior_last_finder;
+ using algorithm::finder_behavior_nth_finder;
+ using algorithm::default_finder_algorithm;
 } // namespace boost
 
 

Added: sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder_aliases.hpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder_aliases.hpp 2010-08-07 18:38:24 EDT (Sat, 07 Aug 2010)
@@ -0,0 +1,52 @@
+#ifndef BOOST_STRING_FINDER_ALIASES_HPP
+#define BOOST_STRING_FINDER_ALIASES_HPP
+
+namespace boost { namespace algorithm {
+ //Naive Search
+ typedef boost::algorithm::finder_t<std::string, std::string,
+ boost::algorithm::naive_search> naive_search_finder;
+ typedef boost::algorithm::finder_t<std::wstring, std::wstring,
+ boost::algorithm::naive_search> wnaive_search_finder;
+
+ //Knuth-Morris-Pratt
+ typedef boost::algorithm::finder_t<std::string, std::string,
+ boost::algorithm::knuth_morris_pratt> knuth_morris_pratt_finder;
+ typedef boost::algorithm::finder_t<std::wstring, std::wstring,
+ boost::algorithm::knuth_morris_pratt> wknuth_morris_pratt_finder;
+
+ //Boyer Moore
+ typedef boost::algorithm::finder_t<std::string, std::string,
+ boost::algorithm::boyer_moore> boyer_moore_finder;
+ typedef boost::algorithm::finder_t<std::wstring, std::wstring,
+ boost::algorithm::boyer_moore> wboyer_moore_finder;
+
+ //Rabin Karp
+ typedef boost::algorithm::finder_t<std::string, std::string,
+ boost::algorithm::rabin_karp32> rabin_karp32_finder;
+ typedef boost::algorithm::finder_t<std::wstring, std::wstring,
+ boost::algorithm::rabin_karp32> wrabin_karp32_finder;
+
+ typedef boost::algorithm::finder_t<std::string, std::string,
+ boost::algorithm::rabin_karp64> rabin_karp64_finder;
+ typedef boost::algorithm::finder_t<std::wstring, std::wstring,
+ boost::algorithm::rabin_karp64> wrabin_karp64_finder;
+
+ //Suffix array
+ typedef boost::algorithm::finder_t<std::string, std::string,
+ boost::algorithm::suffix_array_search> suffix_array_finder;
+ typedef boost::algorithm::finder_t<std::wstring, std::wstring,
+ boost::algorithm::suffix_array_search> wsuffix_array_finder;
+
+} } // namespace algorithm, namespace boost
+
+namespace boost
+{
+ using algorithm::naive_search_finder; using algorithm::wnaive_search_finder;
+ using algorithm::knuth_morris_pratt_finder; using algorithm::wknuth_morris_pratt_finder;
+ using algorithm::boyer_moore_finder; using algorithm::wboyer_moore_finder;
+ using algorithm::rabin_karp32_finder; using algorithm::wrabin_karp32_finder;
+ using algorithm::rabin_karp64_finder; using algorithm::wrabin_karp64_finder;
+ using algorithm::suffix_array_finder; using algorithm::wsuffix_array_finder;
+}
+
+#endif

Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/split.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/split.hpp (original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/split.hpp 2010-08-07 18:38:24 EDT (Sat, 07 Aug 2010)
@@ -67,7 +67,7 @@
             return ::boost::algorithm::iter_find(
                 Result,
                 Input,
- ::boost::algorithm::first_finder(Search) );
+ ::boost::algorithm::first_finder(Search) );
         }
 
         //! Find all algorithm ( case insensitive )
@@ -146,7 +146,7 @@
             return ::boost::algorithm::iter_split(
                 Result,
                 Input,
- ::boost::algorithm::token_finder( Pred, eCompress ) );
+ ::boost::algorithm::token_finder( Pred, eCompress ) );
         }
 
     } // namespace algorithm

Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/boyer_moore.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/boyer_moore.hpp (original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/boyer_moore.hpp 2010-08-07 18:38:24 EDT (Sat, 07 Aug 2010)
@@ -21,6 +21,8 @@
 
 #include <boost/unordered_map.hpp>
 #include <boost/functional/hash.hpp>
+#include <boost/utility/enable_if.hpp>
+#include <locale>
 
 namespace boost { namespace algorithm {
     struct boyer_moore
@@ -62,21 +64,47 @@
             inline void on_string_change()
             { }
         private:
- struct compute_first_table_functor {
+ struct compute_first_table_functor
+ {
+ //Case 1: ComparatorT=boost::algorithm::is_equal
                 void operator()(typename boost::call_traits<substring_char_type>::param_type c)
                 {
+ compute_first_table<comparator_type>(c);
+ }
+ compute_first_table_functor (algorithm &alg) : idx_(0), alg_(alg)
+ { alg_.table1.clear(); }
+ private:
+ template <class ComparatorTT>
+ void compute_first_table(typename boost::call_traits<substring_char_type>::param_type c,
+ typename boost::enable_if<
+ typename boost::is_same<ComparatorTT, boost::algorithm::is_equal> >::type* =0)
+ {
                     //alg_.table1.insert( std::make_pair(c, alg_.substr_size_ - 1 - idx_) );
                     if (idx_ != 0) alg_.table1.insert(std::make_pair(c, idx_));
 
                     ++idx_;
                 }
- compute_first_table_functor (algorithm &alg) : idx_(0), alg_(alg)
- { alg_.table1.clear(); }
- private:
+
+ //Case 2: ComparatorT=boost::algorithm::is_iequal
+ template <class ComparatorTT>
+ void compute_first_table(typename boost::call_traits<substring_char_type>::param_type c,
+ typename boost::enable_if<
+ typename boost::is_same<ComparatorTT, boost::algorithm::is_iequal> >::type* =0)
+ {
+ //Case insensitive matches are supported properly for char (in case of Boyer-Moore)
+ BOOST_STATIC_ASSERT((boost::is_same<substring_char_type,char>::value));
+
+ //!\todo get locale from user?
+ if (idx_ != 0) alg_.table1.insert(std::make_pair(
+ std::tolower(c, std::locale()), idx_
+ ));
+ ++idx_;
+ }
                 std::size_t idx_;
                 algorithm &alg_;
             };
 
+#if 0
             struct compute_second_table_functor {
                 void operator()(typename boost::call_traits<substring_char_type>::param_type c)
                 {
@@ -88,6 +116,7 @@
                 std::size_t idx_;
                 algorithm &alg_;
             };
+#endif
 
             //precomputation on pattern=bidirectional range
             void on_substring_change(std::bidirectional_iterator_tag)
@@ -102,8 +131,127 @@
                     compute_first_table_functor(*this));
                 
                 //Compute the second table
- boost::for_each(substr | boost::adaptors::reversed,
- compute_second_table_functor(*this));
+
+ //this is a table similar to the one in the KMP algorithm
+ //failure_func[i]=k means k is the size of the biggest boundary of the string P[i..m-1]
+ //(which is not the string itself)
+ //i.e. P[i..i+k+1] = P[..m-1]
+ std::vector<std::size_t, typename allocator_type::template rebind<std::size_t>::other>
+ failure_func(substr_size_);
+
+
+ //The second table: delta2(x) = m-1-i + min(d1(x),d2(x)) <= 2m-1-i
+ table2.clear(); table2.reserve(substr_size_);
+ for (unsigned int i = 0; i < substr_size_; ++i)
+ table2.push_back(2*substr_size_ - 1 - i); // initialize with m-1-i+m=2m-1-i (maximum)
+ //table2.push_back(substr_size_);
+
+ if (substr_size_ < 2) return;
+
+ substring_iterator_type const &pattern = boost::begin(substr);
+
+ //!\todo find a better solution for this
+ for (unsigned int i = substr_size_-1;i--;)
+ {
+ if (!comp(pattern[i],pattern[substr_size_-1]))
+ {
+ table2[substr_size_-1] = substr_size_ - 1 - i;
+ break;
+ }
+ }
+
+ //feel free to refactor if you can find better names heh
+ std::size_t i = substr_size_ - 1,j = substr_size_ - 2;
+ //it's zero-initialized anyway
+ ///failure_func[substr_size_-1] = 0; // the one-character string cannot have boundaries
+
+ //Start computing failure_func, and at the same time use the obtained values
+ //to compute the first part of the second table (d1)
+ while (true) // the condition is j>=0, checked at the end.
+ {
+ //Invariant: P[i+1..m-1] = P[j+1..m-1]
+ //try to align with pattern indexed by j (by sliding the pattern indexed by i)
+ while (i != substr_size_ - 1 && !comp(pattern[i],pattern[j]))
+ i = substr_size_ - 1 - failure_func[i + 1];
+
+ //Invariant: Either i=m-1 or P[i..m-1] = P[j..m-1]
+ while (i == substr_size_-1 && j > 0 && !comp(pattern[substr_size_-1],pattern[j]))
+ {
+ //couldn't align the given j with any i
+ failure_func[j] = 0;
+ --j;
+ }
+ //Invariant: either (j==0 and i=m-1) or P[i..m-1] = P[j..m-1]
+ if (j == 0 && i == substr_size_-1 && !comp(pattern[0],pattern[substr_size_-1]))
+ {
+ failure_func[0] = 0;
+ }
+ else failure_func[j] = substr_size_-i;
+
+ //we only use the newly computed value pi(j)=a if j and a are nonzero.
+ if (j != 0)
+ {
+ //Invariant: P[i..m-1] = P[j..m-1] (i and j are aligned)
+ /*
+ Let B_P(i) be the set of all boundary sizes of P[i..m-1]
+ B_P(i) = { k+1 | (k=-1) OR (P[i..i+k] = P[m-1-k..m-1]) }
+ (k+1 because P[i..i+k] contains k+1 characters)
+ (k=-1 because k+1=0 is always a valid boundary size -- the empty boundary)
+ The set B_P(j) is computed at this point, based on the assignment on
+ the failure function above.
+
+ We want to compute:
+ d1(x) = min { k | (k=m) OR ((P[x+1..m-1] = P[x+1-k..m-1-k]) AND P[i]!=P[i-k]) }
+ for x<m-1 and k>0
+ This is equivalent to:
+ d1(x) = min { k | (k=m) OR (m-1-x \in B_P(x+1-k) AND P[i]!=P[i-k]) }
+
+ Whenever a mismatch occurs whilist matching this pattern against a text,
+ d1(x) tells us how much the pattern should shift in order for it to be able
+ to match.
+
+ */
+
+ //for all nonzero a \in B_P(j):
+
+ //Using the definition of d1 above:
+ //a = m-1-x <=> x = m-1-a
+ //j = x+1-k <=> k = m-a-j
+ unsigned int a = failure_func[j];
+ while (a > 0 && !comp(pattern[substr_size_-1-a], pattern[j-1]))
+ {
+ assert(substr_size_-1-a >= substr_size_-a-j); // x >= k
+ if (table2[substr_size_-1-a] > 2*substr_size_-i-1 -a-j)
+ table2[substr_size_-1-a] = 2*substr_size_-i-1 -a-j;
+ //if (table2[substr_size_-1-a] > substr_size_-a-j)
+ // table2[substr_size_-1-a] = substr_size_-a-j;
+
+ a = failure_func[substr_size_ - a]; // get the next boundary size in B_P(j)
+ }
+ }
+ if (j > 0) { --j; --i; }
+ else break;
+ }
+ //We're done computing the reverse KMP function and the first part of the second table
+ //now for the second part:
+ //d2(i) = min { k-1 | (k=m+1) OR (P[0..m-k] = P[k-1..m-1] AND i+2<=k<=m) }
+ //equivalent to:
+ //d2(i) = min { k-1 | (k=m+1) OR (m-k+1 \in B_P(0) AND i+2 <= k <= m) }
+
+ //!\TODO IMPORTANT!! MUST REMOVE THE LOOP BELOW, IT'S ONLY FOR DEBUGGING PURPOSES
+ //for (std::size_t i = 0; i < substr_size_; ++i)
+ // table2[i] = substr_size_;
+
+ std::size_t boundary = failure_func[0];
+ for (std::size_t i = 0; i < substr_size_-1 && boundary; ++i)
+ {
+ while (boundary > 0 && i+1>substr_size_-boundary)
+ boundary = failure_func[substr_size_-boundary];
+ if (table2[i] > 2*substr_size_-i-1-boundary)
+ table2[i] = 2*substr_size_-i-1-boundary;
+ //if (table2[i] > substr_size_-boundary)
+ // table2[i] = substr_size_-boundary;
+ }
             }
 
             //finding in text=random access range
@@ -119,7 +267,9 @@
                     return string_range_type(start, start);
 
                 str_size = boost::end(str) - start;
- for (str_idx = substr_size_ - 1, substr_idx = substr_size_ - 1; str_idx < str_size;)
+ str_idx = substr_size_ - 1; substr_idx = substr_size_ - 1;
+
+ while (str_idx < str_size)
                 {
                     if (comp(start[str_idx],
                         boost::begin(substr)[substr_idx]))
@@ -132,23 +282,29 @@
                     }
                     else
                     {
- table1_type::const_iterator iter = table1.find(start[str_idx]);
+ table1_type::const_iterator iter = table1_find<comparator_type>(start[str_idx]);
+ std::size_t step = substr_size_ - substr_idx;
                         if (iter == table1.end())
                         {
- str_idx += substr_size_ - substr_idx;
- substr_idx = substr_size_ - 1;
+ step = substr_size_;
                         }
- //else if (iter->second > substr_idx)
- //{
- // str_idx += iter->second;
- // substr_idx = substr_size_ - 1;
- //}
- else
+ else if (substr_size_ - 1 - iter->second < substr_idx)
                         {
- assert(substr_size_ >= substr_idx);
- str_idx += substr_size_-substr_idx;
- substr_idx = substr_size_ - 1;
+ step = iter->second;
                         }
+
+ if (step < table2[substr_idx]) str_idx += table2[substr_idx];
+ else str_idx += step;
+ //start from the end of the substring again.
+ substr_idx = substr_size_ - 1;
+ //fast loop. get rid of matches that fail on the first character.
+ //remove this if it shows no improvement whatsoever. might be useful after adding table2
+ /*while (str_idx < str_size && !comp(start[str_idx],boost::begin(substr)[substr_idx]))
+ {
+ table1_type::const_iterator iter = table1_find<comparator_type>(start[str_idx]);
+ if (iter == table1.end()) str_idx += substr_size_;
+ else str_idx += iter->second;
+ }*/
                     }
                 }
                 return string_range_type(boost::end(str), boost::end(str));
@@ -162,6 +318,28 @@
                     rebind<substring_char_type>::other
> table1_type;
             table1_type table1;
+ typedef typename std::vector<std::size_t,
+ typename AllocatorT::template rebind<std::size_t>::other
+ > table2_type;
+ table2_type table2;
+
+ //Case 1: ComparatorT=boost::algorithm::is_equal
+ template <class ComparatorTT>
+ typename table1_type::iterator table1_find (string_char_type const &chr,
+ typename boost::enable_if<
+ typename boost::is_same<ComparatorTT, boost::algorithm::is_equal> >::type* = 0)
+ {
+ return table1.find(chr);
+ }
+
+ //Case 2: ComparatorT=boost::algorithm::is_iequal
+ template <class ComparatorTT>
+ typename table1_type::iterator table1_find (string_char_type const &chr,
+ typename boost::enable_if<
+ typename boost::is_same<ComparatorTT, boost::algorithm::is_iequal> >::type* = 0)
+ {
+ return table1.find(std::tolower(chr, std::locale()));
+ }
             
             //std::vector<std::pair<substring_char_type, std::size_t> > table1;
 
@@ -169,15 +347,13 @@
 
         };
     };
+ struct boyer_moore_tag { typedef boost::algorithm::boyer_moore type; };
 } }
 
 namespace boost
 {
     using boost::algorithm::boyer_moore;
- typedef boost::algorithm::finder_t<std::string, std::string,
- boost::algorithm::boyer_moore> boyer_moore_finder;
- typedef boost::algorithm::finder_t<std::wstring, std::wstring,
- boost::algorithm::boyer_moore> wboyer_moore_finder;
+ using boost::algorithm::boyer_moore_tag;
 }
 
 #endif
\ No newline at end of file

Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/knuth_morris_pratt.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/knuth_morris_pratt.hpp (original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/knuth_morris_pratt.hpp 2010-08-07 18:38:24 EDT (Sat, 07 Aug 2010)
@@ -16,10 +16,10 @@
     {
 
         template <class Finder,class RandomAccessRange1T,
- class RandomAccessRange2T,class ComparatorT,class AllocatorT>
+ class RandomAccessRange2T,class ComparatorT,class AllocatorT>
         class algorithm
             : public boost::algorithm::detail::finder_typedefs<
- RandomAccessRange1T,RandomAccessRange2T,ComparatorT,AllocatorT>
+ RandomAccessRange1T,RandomAccessRange2T,ComparatorT,AllocatorT>
         {
         public:
             /*typedef RandomAccessIterator1T substring_iterator_type;
@@ -53,33 +53,61 @@
                 substring_range_type const &substr = static_cast<Finder*>(this)->get_substring_range();
                 comparator_type const &comp = static_cast<Finder*>(this)->get_comparator();
 
- std::size_t idx = start - boost::begin(str),
- str_size = boost::end(str) - boost::begin(str), q = 0,
+ std::size_t str_idx = start - boost::begin(str),
+ str_size = boost::end(str) - boost::begin(str), substr_idx = 0,
                     substr_size = boost::end(substr) - boost::begin(substr);
 
                 if (boost::begin(substr) == boost::end(substr))
                     return boost::iterator_range<string_iterator_type>(
                         start, start);
 
- while (idx < str_size)
+ //substring bigger than string
+ if (substr_size > str_size-str_idx)
+ return make_iterator_range(boost::end(str), boost::end(str));
+
+ //!\todo step by step run for substr_size=1.
+
+ //while (idx < str_size)
+ std::size_t compare_against = str_size - substr_size + 1;
+ while (str_idx-substr_idx < compare_against)
                 {
- // Invariant: substring[0..q-1] == string[..idx-1]
+ // Invariant: substring[0..substr_idx-1] == string[..str_idx-1] iff substr_idx>0
                     
+ //Slide the pattern to the right until we manage to find a match for the current char
+ while (substr_idx > 0 &&
+ !comp(boost::begin(str)[str_idx],boost::begin(substr)[substr_idx]))
+ substr_idx = failure_func[substr_idx-1];
+
+ // Invariant: Either substr_idx==0 or string[str_idx]==substr[substr_idx]
+
+ while (substr_idx == 0 && str_idx < compare_against
+ && !comp(boost::begin(str)[str_idx],boost::begin(substr)[0]))
+ ++str_idx;
+
+ //Invariant: string[str_idx]==substr[substr_idx] or (str_idx=0 and str_idx >= str_size)
+ if (substr_idx == substr_size - 1 && str_idx-substr_idx < compare_against)
+ return make_iterator_range(
+ boost::begin(str)+(str_idx+1-substr_size),
+ boost::begin(str)+(str_idx+1));
+
+ ++substr_idx; ++str_idx;
+ /*
                     //if (boost::begin(substr)[q] == boost::begin(str)[idx])
- if (comp(boost::begin(substr)[q], boost::begin(str)[idx]))
+ if (comp(boost::begin(substr)[substr_idx], boost::begin(str)[str_idx]))
                     {
- ++idx; ++q;
- if (q == substr_size)
+ if (str_idx == substr_size-1)
                             return boost::iterator_range<string_iterator_type>(
                                 boost::begin(str)+(idx-substr_size),
                                 boost::begin(str)+idx
                             );
+ ++substr_idx; ++str_idx;
                     }
                     else
                     {
                         if (q == 0) ++idx;
                         else q = failure_func[q];
                     }
+ */
                 }
                 return boost::iterator_range<string_iterator_type>(
                     boost::end(str), boost::end(str));
@@ -90,10 +118,14 @@
                 substring_range_type const &substr = static_cast<Finder*>(this)->get_substring_range();
                 comparator_type const &comp = static_cast<Finder*>(this)->get_comparator();
 
- //!\todo clear the container first
+ //failure_func[i] = the size of the largest border (prefix that's also a suffix)
+ // of P[0..i], that's different from itself.
+ //failure_func[i] = q <=> P[0..q-1] = P[..m-1] (first q chars equal to last q chars)
+
+ //0 <= failure_func[i] <= i
                 failure_func.clear();
                 failure_func.reserve(boost::end(substr) - boost::begin(substr));
- std::size_t idx, q, substr_size = boost::end(substr) - boost::begin(substr);
+ /*std::size_t idx, q, substr_size = boost::end(substr) - boost::begin(substr);
                 failure_func.push_back(0); failure_func.push_back(0);
                 
                 if (boost::begin(substr) == boost::end(substr)) return;
@@ -112,20 +144,37 @@
                         else if (q == 0) { failure_func.push_back(0); break; }
                         q = failure_func[q];
                     }
+ }*/
+
+ //!\TODO write failure func
+ std::size_t i = 0, j = 1, substr_size = boost::end(substr) - boost::begin(substr);
+ failure_func.push_back(0); // failure_func[0] = 0
+ while (j < substr_size)
+ {
+ while (i > 0 && !comp(boost::begin(substr)[i], boost::begin(substr)[j]))
+ i = failure_func[i-1];
+ while (i == 0 && j < substr_size && !comp(boost::begin(substr)[0],boost::begin(substr)[j]))
+ {
+ //Invariant: i == 0 and substr[0] != substr[j], which means failure_func[j]=0
+ failure_func.push_back(0);
+ ++j;
+ }
+ //Invariant: j is out of bounds or P[i]==P[j], meaning failure_func[j]=i+1
+ if (j < substr_size) failure_func.push_back(i+1);
+ ++j; ++i;
                 }
             }
 
         };
     };
+ struct knuth_morris_pratt_tag { typedef boost::algorithm::knuth_morris_pratt type; };
 } }
 
 namespace boost
 {
     using boost::algorithm::knuth_morris_pratt;
- typedef boost::algorithm::finder_t<std::string, std::string,
- boost::algorithm::knuth_morris_pratt> knuth_morris_pratt_finder;
- typedef boost::algorithm::finder_t<std::wstring, std::wstring,
- boost::algorithm::knuth_morris_pratt> wknuth_morris_pratt_finder;
+ using boost::algorithm::knuth_morris_pratt_tag;
+
 }
 
 #endif
\ No newline at end of file

Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/naive_search.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/naive_search.hpp (original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/naive_search.hpp 2010-08-07 18:38:24 EDT (Sat, 07 Aug 2010)
@@ -67,16 +67,12 @@
                 };
 
         };
-
+ struct naive_search_tag { typedef boost::algorithm::naive_search type; };
 } }
         
 namespace boost
 {
     using boost::algorithm::naive_search;
- typedef boost::algorithm::finder_t<std::string, std::string,
- boost::algorithm::naive_search> naive_search_finder;
- typedef boost::algorithm::finder_t<std::wstring, std::wstring,
- boost::algorithm::naive_search> wnaive_search_finder;
 }
 
 #endif
\ No newline at end of file

Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/rabin_karp.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/rabin_karp.hpp (original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/rabin_karp.hpp 2010-08-07 18:38:24 EDT (Sat, 07 Aug 2010)
@@ -84,22 +84,16 @@
     //1/150167080229379589 odds of collision. useful with wchar_t
     typedef rabin_karp_algorithm<boost::uint64_t,337515847,373587883,255150899,401959183> rabin_karp64;
 
+ struct rabin_karp32_tag { typedef boost::algorithm::rabin_karp32 type; };
+ struct rabin_karp64_tag { typedef boost::algorithm::rabin_karp64 type; };
 } } // namespace algorithm, namespace boost
 
 namespace boost
 {
     using boost::algorithm::rabin_karp32;
     using boost::algorithm::rabin_karp64;
-
- typedef boost::algorithm::finder_t<std::string, std::string,
- boost::algorithm::rabin_karp32> rabin_karp32_finder;
- typedef boost::algorithm::finder_t<std::wstring, std::wstring,
- boost::algorithm::rabin_karp32> wrabin_karp32_finder;
-
- typedef boost::algorithm::finder_t<std::string, std::string,
- boost::algorithm::rabin_karp64> rabin_karp64_finder;
- typedef boost::algorithm::finder_t<std::wstring, std::wstring,
- boost::algorithm::rabin_karp64> wrabin_karp64_finder;
+ using boost::algorithm::rabin_karp32_tag;
+ using boost::algorithm::rabin_karp64_tag;
 }
 
 #endif
\ No newline at end of file

Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/suffix_array.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/suffix_array.hpp (original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/suffix_array.hpp 2010-08-07 18:38:24 EDT (Sat, 07 Aug 2010)
@@ -20,20 +20,15 @@
         //! parameter for LessThanComparator. Maybe we should pass two comparators, give it some thought.
         template <class Finder,class RandomAccessRange1T,
             class RandomAccessRange2T,class ComparatorT,class AllocatorT>
- class algorithm
+ class algorithm;
+ template <class Finder, class RandomAccessRange1T,
+ class RandomAccessRange2T, class AllocatorT>
+ class algorithm<Finder,RandomAccessRange1T,RandomAccessRange2T,
+ boost::algorithm::is_equal,AllocatorT>
             : public boost::algorithm::detail::finder_typedefs<
- RandomAccessRange1T,RandomAccessRange2T,ComparatorT,AllocatorT>
+ RandomAccessRange1T,RandomAccessRange2T,boost::algorithm::is_equal,AllocatorT>
         {
         public:
- /*typedef RandomAccessIterator1T substring_iterator_type;
- typedef RandomAccessIterator2T string_iterator_type;
- typedef typename
- std::iterator_traits<substring_iterator_type>::value_type substring_char_type;
- typedef typename std::iterator_traits<string_iterator_type>::value_type string_char_type;
- typedef typename boost::iterator_range<substring_iterator_type> substring_range_type;
- typedef typename boost::iterator_range<string_iterator_type> string_range_type;
- typedef Comparator comparator_type;
- typedef Allocator allocator_type;*/
             std::string get_algorithm_name () const { return "Suffix array"; }
         protected:
             algorithm () : found_matches_(false), pos_(), matches_() { }
@@ -214,15 +209,13 @@
 
         };
     };
+ struct suffix_array_search_tag { typedef boost::algorithm::suffix_array_search type; };
 } }
 
 namespace boost
 {
     using boost::algorithm::suffix_array_search;
- typedef boost::algorithm::finder_t<std::string, std::string,
- boost::algorithm::suffix_array_search> suffix_array_finder;
- typedef boost::algorithm::finder_t<std::wstring, std::wstring,
- boost::algorithm::suffix_array_search> wsuffix_array_finder;
+ using boost::algorithm::suffix_array_search_tag;
 }
 
 #endif
\ No newline at end of file

Modified: sandbox/SOC/2010/stringalgos/libs/algorithm/string/benchmark/string_search.cpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/libs/algorithm/string/benchmark/string_search.cpp (original)
+++ sandbox/SOC/2010/stringalgos/libs/algorithm/string/benchmark/string_search.cpp 2010-08-07 18:38:24 EDT (Sat, 07 Aug 2010)
@@ -24,6 +24,7 @@
 
 #include <iostream>
 #include <string>
+#include <vector>
 
 
 using namespace std;
@@ -136,6 +137,7 @@
 //#define BOOST_MPL_LIMIT_METAFUNCTION_ARITY 10
 #include <boost/algorithm/string/benchmark_finder.hpp>
 
+#include <boost/algorithm/string/find.hpp>
 #include <boost/algorithm/string/finder.hpp>
 #include <boost/algorithm/string/string_search.hpp>
 
@@ -171,11 +173,16 @@
 #include <boost/throw_exception.hpp>
 
 #include <boost/format.hpp>
+#include <ctime>
 
+//replace to a constant to compare multiple benchmarks
+unsigned int get_seed() { return 123; }
 
 unsigned int mrand (unsigned int min, unsigned int max)
 {
     static boost::mt19937 rng;
+ static bool init = false;
+ if (!init) { init = true; rng.seed(static_cast<unsigned int>(get_seed())); }
     assert(min <= max);
     boost::uniform_int<> distrib(min, max);
     boost::variate_generator<boost::mt19937&, boost::uniform_int<> > gen(rng, distrib);
@@ -195,7 +202,7 @@
 std::string rand_substr(std::string const &str)
 {
     unsigned int size = mrand(1,str.size());
- unsigned int offset = mrand(0, str.size()-size-1);
+ unsigned int offset = mrand(0, str.size()-size);
     return std::string(str.begin()+offset, str.begin()+offset+size);
 }
 
@@ -219,8 +226,8 @@
 char find_most_frequent(std::string const &str)
 {
     static const unsigned int char_range_size =
- std::numeric_limits<char>::max() -
- std::numeric_limits<char>::min() + 1;
+ (std::numeric_limits<char>::max)() -
+ (std::numeric_limits<char>::min)() + 1;
     std::vector<unsigned int> freq(char_range_size);
     for (std::string::const_iterator it = str.begin(); it != str.end(); ++it)
     {
@@ -228,12 +235,12 @@
         //++freq[*it - std::numeric_limits<char>::min()];
         try
         {
- ++freq.at((int)*it - std::numeric_limits<char>::min());
+ ++freq.at((int)*it - (std::numeric_limits<char>::min)());
         }
         catch (std::exception const &)
         {
                 BOOST_THROW_EXCEPTION(std::runtime_error(
- (boost::format("Tried to increment index %1% of the vector") % ((int)*it - std::numeric_limits<char>::min())).str()
+ (boost::format("Tried to increment index %1% of the vector") % ((int)*it - (std::numeric_limits<char>::min)())).str()
                 ));
         }
         
@@ -241,9 +248,18 @@
     unsigned int max_idx = 0, max = freq[0];
     for (unsigned int i = 1; i < char_range_size; ++i)
         if (freq[i] > max) { max = freq[i]; max_idx = i; }
- return max_idx + std::numeric_limits<char>::min();
+ return max_idx + (std::numeric_limits<char>::min)();
 }
 
+unsigned int count_different_chars(std::string const &str, std::string const &str2)
+{
+ assert(str.size() == str2.size());
+ std::string::size_type size = str.size();
+ unsigned int ret = 0;
+ for (unsigned int i = 0; i < size; ++i)
+ if (str[i] != str2[i]) ++ret;
+ return ret;
+}
 
 void unit_test()
 {
@@ -252,6 +268,22 @@
     assert(find_most_frequent("aaabbccc") != 'b');
     assert(find_most_frequent("aaaaaaaaaaa") == 'a');
     assert(find_most_frequent("abababb") == 'b');
+ std::string str( rand_str(10000));
+ for (unsigned int i = 0; i < 100; ++i)
+ {
+ std::string substr( rand_substr(str) );
+ std::string::difference_type offset = str.find(substr);
+ std::string::iterator begin = boost::begin( boost::algorithm::find_first(str, substr) );
+ assert(begin == str.begin()+offset && begin != str.end());
+ }
+ for (unsigned int i = 0; i < 100; ++i)
+ {
+ std::string str2 = rand_str(10);
+ std::string obfuscate_beginning = rand_obfuscate_beginning(str2),
+ obfuscate_end = rand_obfuscate_end(str2);
+ assert(count_different_chars(str2,obfuscate_beginning)<=1);
+ assert(count_different_chars(str2,obfuscate_end)<=1);
+ }
 }
 
 int main ()
@@ -272,14 +304,17 @@
             boost::knuth_morris_pratt,
             boost::boyer_moore,
             //boost::suffix_array_search,
- boost::rabin_karp32/*,
- boost::rabin_karp64*/
+ boost::rabin_karp32//,
+ //boost::rabin_karp64
>,
        boost::is_equal> b;
 
+ unit_test();
+
     b.set_substring(&substr); b.set_string(&str);
     std::cout << "Doing almost no work:" << std::endl;
- for (unsigned int i=0; i<5000; ++i) b.find_first();
+ //!\todo uncomment
+ //for (unsigned int i=0; i<5000; ++i) b.find_first();
     b.output_stats(std::cout);
 
     b.clear();
@@ -288,21 +323,22 @@
 
     std::vector<std::string> benchmark_files = list_of
         ("dblp.xml.200MB") ("dna.200MB") ("english.200MB") ("pitches.50MB")
- ("proteins.200MB") ("sources.200MB") ("random1.50MB");
+ ("proteins.200MB") ("sources.200MB") ("random1.50MB") ("binary.50MB");
     boost::for_each(benchmark_files, arg1 = benchmark_path + "/" + arg1);
 
- unit_test();
 
- //Category 1: The text
+ //Category 1: The substring and string change every iteration
     // Test 1: Against long nonmatching substring
     // Test 2: Against almost matching substrings (differing in ending only)
     // Test 3: Against almost matching substrings (differing in beginning only)
     // Test 4: Against matching substrings
     // Test 5: Against the most frequent character
+ //Category 2: The string stays constant, the substrings change
+ //Category 3: The substring changes, the string stays constant
     bool substr_change = false;
     try {
- //!\todo move back to starting at step 1
- for (unsigned int test = 1; test <= 5; ++test)
+ //Category 1
+ for (unsigned int test = 0; test <= 5; ++test)
         {
 
             BOOST_FOREACH(std::string fn, benchmark_files)
@@ -317,11 +353,13 @@
                 std::ifstream bf(fn, std::ios::binary);
                 if (!bf) { std::cerr << "Error opening " << fn << std::endl; return -1; }
 
- std::vector<char> buf(1<<20);
- while (bf.read(&buf[0], mrand(1000,1000000)))
+ const unsigned int MAX_STR_SIZE = 10*1024*1024;//10;//1000000;
+ const unsigned int MIN_STR_SIZE = 10*1024;//1000;
+ std::vector<char> buf(MAX_STR_SIZE+32);
+ while (bf.read(&buf[0], mrand(MIN_STR_SIZE,MAX_STR_SIZE)))
                 {
- //Note: Only reading the first 10MB of the file!!
- if (bf.tellg() > 10*1024*1024) break;
+ //Note: Only reading the first x MB of the file!!
+ //if (bf.tellg() > 30*1024*1024) break;
 
                     try {
                         assert(bf.gcount() <= buf.size());
@@ -356,11 +394,38 @@
                         if (substr_change) b.set_substring(&substr);
                         b.set_string(&str);
                     } catch (std::exception const &e) { BOOST_THROW_EXCEPTION(e); }
- while (boost::distance(b.find_next()));
+ //std::cerr << '+';
+ while (boost::distance(b.find_next())); //std::cerr << "+";
+ //std::cerr << "+";
                 } // end while (read)
                 b.output_stats(std::cout);
             } // end foreach file
         } // end for(test)
+
+ //Category 2
+ std::vector<std::string> str_list;
+
+ std::string current_str;
+ std::string repeated_substr = rand_str(1000),
+ repeated_substr2 = rand_obfuscate_beginning(repeated_substr),
+ repeated_substr3 = rand_obfuscate_end(repeated_substr);
+ for (unsigned int i=0; i<5000; ++i) current_str += repeated_substr + rand_str(10);
+ str_list.push_back(current_str); current_str.clear();
+ for (unsigned int i=0; i<5000; ++i) current_str += repeated_substr2 + rand_str(10) + repeated_substr;
+ str_list.push_back(current_str); current_str.clear();
+ for (unsigned int i=0; i<5000; ++i) current_str += repeated_substr3 + rand_str(10) + repeated_substr;
+ str_list.push_back(current_str);
+
+ b.set_substring(&repeated_substr);
+ for (unsigned int str_id = 0; str_id < str_list.size(); ++str_id)
+ //for (unsigned int str_id = 0; str_id < 1; ++str_id)
+ {
+ b.clear();
+ std::cout << "Testing against string #" << str_id << ':' << std::endl;
+ b.set_string(&str_list[str_id]);
+ while (boost::distance(b.find_next()));
+ b.output_stats(std::cout);
+ }
     } // end try
     catch (boost::exception const &e)
     {

Modified: sandbox/SOC/2010/stringalgos/libs/algorithm/string/doc/Jamfile.v2
==============================================================================
--- sandbox/SOC/2010/stringalgos/libs/algorithm/string/doc/Jamfile.v2 (original)
+++ sandbox/SOC/2010/stringalgos/libs/algorithm/string/doc/Jamfile.v2 2010-08-07 18:38:24 EDT (Sat, 07 Aug 2010)
@@ -31,6 +31,15 @@
     [ glob ../../../../boost/algorithm/string/case_conv.hpp ]
     [ glob ../../../../boost/algorithm/string/find.hpp ]
     [ glob ../../../../boost/algorithm/string/finder.hpp ]
+ [ glob ../../../../boost/algorithm/string/finder_aliases.hpp ]
+ [ glob ../../../../boost/algorithm/string/benchmark_finder.hpp ]
+ [ glob ../../../../boost/algorithm/string/string_search.hpp ]
+ [ glob ../../../../boost/algorithm/string/string_search/boyer_moore.hpp ]
+ [ glob ../../../../boost/algorithm/string/string_search/knuth_morris_pratt.hpp ]
+ [ glob ../../../../boost/algorithm/string/string_search/naive_search.hpp ]
+ [ glob ../../../../boost/algorithm/string/string_search/rabin_karp.hpp ]
+ [ glob ../../../../boost/algorithm/string/string_search/suffix_array.hpp ]
+
     [ glob ../../../../boost/algorithm/string/find_iterator.hpp ]
     [ glob ../../../../boost/algorithm/string/trim.hpp ]
     [ glob ../../../../boost/algorithm/string/predicate.hpp ]

Modified: sandbox/SOC/2010/stringalgos/libs/algorithm/string/doc/usage.xml
==============================================================================
--- sandbox/SOC/2010/stringalgos/libs/algorithm/string/doc/usage.xml (original)
+++ sandbox/SOC/2010/stringalgos/libs/algorithm/string/doc/usage.xml 2010-08-07 18:38:24 EDT (Sat, 07 Aug 2010)
@@ -1,28 +1,19 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE library PUBLIC "-//Boost//DTD BoostBook XML V1.0//EN"
-"http://www.boost.org/tools/boostbook/dtd/boostbook.dtd">
-
-
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE library PUBLIC "-//Boost//DTD BoostBook XML V1.0//EN" "http://www.boost.org/tools/boostbook/dtd/boostbook.dtd"[]>
 <!-- Copyright (c) 2002-2006 Pavol Droba.
      Subject to the Boost Software License, Version 1.0.
      (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
 -->
-
-
 <section id="string_algo.usage" last-revision="$Date$">
- <title>Usage</title>
-
- <using-namespace name="boost"/>
- <using-namespace name="boost::algorithm"/>
-
-
- <section>
- <title>First Example</title>
-
- <para>
+ <title>Usage</title>
+ <using-namespace name="boost" />
+ <using-namespace name="boost::algorithm" />
+ <section>
+ <title>First Example</title>
+ <para>
             Using the algorithms is straightforward. Let us have a look at the first example:
         </para>
- <programlisting>
+ <programlisting>
     #include &lt;boost/algorithm/string.hpp&gt;
     using namespace std;
     using namespace boost;
@@ -38,14 +29,15 @@
           ireplace_first_copy(
              str1,"hello","goodbye")); // str2 == "goodbye world!"
         </programlisting>
- <para>
+ <para>
             This example converts str1 to upper case and trims spaces from the start and the end
             of the string. str2 is then created as a copy of str1 with "hello" replaced with "goodbye".
             This example demonstrates several important concepts used in the library:
         </para>
- <itemizedlist>
- <listitem>
- <para><emphasis role="bold">Container parameters:</emphasis>
+ <itemizedlist>
+ <listitem>
+ <para>
+ <emphasis role="bold">Container parameters:</emphasis>
                     Unlike in the STL algorithms, parameters are not specified only in the form
                     of iterators. The STL convention allows for great flexibility,
                     but it has several limitations. It is not possible to <emphasis>stack</emphasis> algorithms together,
@@ -53,32 +45,35 @@
                     a return value from another algorithm. It is considerably easier to write
                     <code>to_lower(str1)</code>, than <code>to_lower(str1.begin(), str1.end())</code>.
                 </para>
- <para>
+ <para>
                     The magic of <ulink url="../../libs/range/index.html">Boost.Range</ulink>
                     provides a uniform way of handling different string types.
                     If there is a need to pass a pair of iterators,
                     <ulink url="../../libs/range/doc/html/range/utilities/iterator_range.html"><code>boost::iterator_range</code></ulink>
                     can be used to package iterators into a structure with a compatible interface.
                 </para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Copy vs. Mutable:</emphasis>
+ </listitem>
+ <listitem>
+ <para>
+ <emphasis role="bold">Copy vs. Mutable:</emphasis>
                     Many algorithms in the library are performing a transformation of the input.
                     The transformation can be done in-place, mutating the input sequence, or a copy
                     of the transformed input can be created, leaving the input intact. None of
                     these possibilities is superior to the other one and both have different
                     advantages and disadvantages. For this reason, both are provided with the library.
                 </para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Algorithm stacking:</emphasis>
+ </listitem>
+ <listitem>
+ <para>
+ <emphasis role="bold">Algorithm stacking:</emphasis>
                     Copy versions return a transformed input as a result, thus allow a simple chaining of
                     transformations within one expression (i.e. one can write <code>trim_copy(to_upper_copy(s))</code>).
                     Mutable versions have <code>void</code> return, to avoid misuse.
                 </para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Naming:</emphasis>
+ </listitem>
+ <listitem>
+ <para>
+ <emphasis role="bold">Naming:</emphasis>
                     Naming follows the conventions from the Standard C++ Library. If there is a
                     copy and a mutable version of the same algorithm, the mutable version has no suffix
                     and the copy version has the suffix <emphasis>_copy</emphasis>.
@@ -86,39 +81,38 @@
                     (e.g. <functionname>ifind_first()</functionname>).
                     This prefix identifies that the algorithm works in a case-insensitive manner.
                 </para>
- </listitem>
- </itemizedlist>
- <para>
+ </listitem>
+ </itemizedlist>
+ <para>
             To use the library, include the <headername>boost/algorithm/string.hpp</headername> header.
             If the regex related functions are needed, include the
             <headername>boost/algorithm/string_regex.hpp</headername> header.
         </para>
- </section>
- <section>
- <title>Case conversion</title>
-
- <para>
+ </section>
+ <section>
+ <title>Case conversion</title>
+ <para>
             STL has a nice way of converting character case. Unfortunately, it works only
             for a single character and we want to convert a string,
         </para>
- <programlisting>
+ <programlisting>
     string str1("HeLlO WoRld!");
     to_upper(str1); // str1=="HELLO WORLD!"
         </programlisting>
- <para>
- <functionname>to_upper()</functionname> and <functionname>to_lower()</functionname> convert the case of
+ <para>
+ <functionname>to_upper()</functionname> and <functionname>to_lower()</functionname> convert the case of
             characters in a string using a specified locale.
         </para>
- <para>
+ <para>
             For more information see the reference for <headername>boost/algorithm/string/case_conv.hpp</headername>.
         </para>
- </section>
- <section>
- <title>Predicates and Classification</title>
- <para>
+ </section>
+ <section>
+ <title>Predicates and Classification</title>
+ <para>
             A part of the library deals with string related predicates. Consider this example:
         </para>
- <programlisting>
+ <programlisting>
     bool is_executable( string&amp; filename )
     {
         return
@@ -142,31 +136,29 @@
         &lt;&lt; " written in the lower case"
         &lt;&lt; endl; // prints "hello world! is written in the lower case"
         </programlisting>
- <para>
+ <para>
             The predicates determine whether if a substring is contained in the input string
             under various conditions. The conditions are: a string starts with the substring,
             ends with the substring,
             simply contains the substring or if both strings are equal. See the reference for
             <headername>boost/algorithm/string/predicate.hpp</headername> for more details.
         </para>
- <para>
+ <para>
             In addition the algorithm <functionname>all()</functionname> checks
             all elements of a container to satisfy a condition specified by a predicate.
             This predicate can be any unary predicate, but the library provides a bunch of
             useful string-related predicates and combinators ready for use.
             These are located in the <headername>boost/algorithm/string/classification.hpp</headername> header.
             Classification predicates can be combined using logical combinators to form
- a more complex expressions. For example: <code>is_from_range('a','z') || is_digit()</code>
- </para>
- </section>
- <section>
- <title>Trimming</title>
-
- <para>
+ a more complex expressions. For example: <code>is_from_range('a','z') || is_digit()</code></para>
+ </section>
+ <section>
+ <title>Trimming</title>
+ <para>
             When parsing the input from a user, strings usually have unwanted leading or trailing
             characters. To get rid of them, we need trim functions:
         </para>
- <programlisting>
+ <programlisting>
     string str1=" hello world! ";
     string str2=trim_left_copy(str1); // str2 == "hello world! "
     string str3=trim_right_copy(str1); // str3 == " hello world!"
@@ -176,7 +168,7 @@
     // remove leading 0 from the phone number
     trim_left_if(phone,is_any_of("0")); // phone == "423333444"
         </programlisting>
- <para>
+ <para>
             It is possible to trim the spaces on the right, on the left or on both sides of a string.
             And for those cases when there is a need to remove something else than blank space, there
             are <emphasis>_if</emphasis> variants. Using these, a user can specify a functor which will
@@ -184,14 +176,13 @@
             predicates like <functionname>is_digit()</functionname> mentioned in the previous paragraph.
             See the reference for the <headername>boost/algorithm/string/trim.hpp</headername>.
         </para>
- </section>
- <section>
- <title>Find algorithms</title>
-
- <para>
+ </section>
+ <section>
+ <title>Find algorithms</title>
+ <para>
             The library contains a set of find algorithms. Here is an example:
         </para>
- <programlisting>
+ <programlisting>
     char text[]="hello dolly!";
     iterator_range&lt;char*&gt; result=find_last(text,"ll");
 
@@ -206,7 +197,7 @@
         cout &lt;&lt; "Dolly is there" &lt;&lt; endl;
     }
         </programlisting>
- <para>
+ <para>
             We have used <functionname>find_last()</functionname> to search the <code>text</code> for "ll".
             The result is given in the <ulink url="../../libs/range/doc/html/range/utilities/iterator_range.html"><code>boost::iterator_range</code></ulink>.
             This range delimits the
@@ -221,25 +212,25 @@
             <code>begin()</code> and <code>end()</code> methods, so it can be used like any other STL container.
             Also it is convertible to bool therefore it is easy to use find algorithms for a simple containment checking.
         </para>
- <para>
+ <para>
             Find algorithms are located in <headername>boost/algorithm/string/find.hpp</headername>.
         </para>
- </section>
- <section>
- <title>Replace Algorithms</title>
- <para>
+ </section>
+ <section>
+ <title>Replace Algorithms</title>
+ <para>
             Find algorithms can be used for searching for a specific part of string. Replace goes one step
             further. After a matching part is found, it is substituted with something else. The substitution is computed
             from the original, using some transformation.
         </para>
- <programlisting>
+ <programlisting>
     string str1="Hello Dolly, Hello World!"
     replace_first(str1, "Dolly", "Jane"); // str1 == "Hello Jane, Hello World!"
     replace_last(str1, "Hello", "Goodbye"); // str1 == "Hello Jane, Goodbye World!"
     erase_all(str1, " "); // str1 == "HelloJane,GoodbyeWorld!"
     erase_head(str1, 6); // str1 == "Jane,GoodbyeWorld!"
         </programlisting>
- <para>
+ <para>
             For the complete list of replace and erase functions see the
             <link linkend="string_algo.reference">reference</link>.
             There is a lot of predefined function for common usage, however, the library allows you to
@@ -251,15 +242,14 @@
             takes the result of the Finder (usually a reference to the found substring) and creates a
             substitute for it. Replace algorithm puts these two together and makes the desired substitution.
         </para>
- <para>
+ <para>
             Check <headername>boost/algorithm/string/replace.hpp</headername>, <headername>boost/algorithm/string/erase.hpp</headername> and
             <headername>boost/algorithm/string/find_format.hpp</headername> for reference.
         </para>
- </section>
- <section>
- <title>Find Iterator</title>
-
- <para>
+ </section>
+ <section>
+ <title>Find Iterator</title>
+ <para>
             An extension to find algorithms it the Find Iterator. Instead of searching for just a one part of a string,
             the find iterator allows us to iterate over the substrings matching the specified criteria.
             This facility is using the <link linkend="string_algo.finder_concept">Finder</link> to incrementally
@@ -267,12 +257,12 @@
             Dereferencing a find iterator yields an <ulink url="../../libs/range/doc/html/range/utilities/iterator_range.html"><code>boost::iterator_range</code></ulink>
             object, that delimits the current match.
         </para>
- <para>
+ <para>
             There are two iterators provided <classname>find_iterator</classname> and
             <classname>split_iterator</classname>. The former iterates over substrings that are found using the specified
             Finder. The latter iterates over the gaps between these substrings.
         </para>
- <programlisting>
+ <programlisting>
     string str1("abc-*-ABC-*-aBc");
     // Find all 'abc' substrings (ignoring the case)
     // Create a find_iterator
@@ -304,31 +294,29 @@
     // ABC
     // aBC
         </programlisting>
- <para>
+ <para>
             Note that the find iterators have only one template parameter. It is the base iterator type.
             The Finder is specified at runtime. This allows us to typedef a find iterator for
             common string types and reuse it. Additionally make_*_iterator functions help
             to construct a find iterator for a particular range.
         </para>
- <para>
+ <para>
             See the reference in <headername>boost/algorithm/string/find_iterator.hpp</headername>.
         </para>
- </section>
- <section>
- <title>Split</title>
-
- <para>
+ </section>
+ <section>
+ <title>Split</title>
+ <para>
             Split algorithms are an extension to the find iterator for one common usage scenario.
             These algorithms use a find iterator and store all matches into the provided
             container. This container must be able to hold copies (e.g. <code>std::string</code>) or
             references (e.g. <code>iterator_range</code>) of the extracted substrings.
         </para>
- <para>
+ <para>
             Two algorithms are provided. <functionname>find_all()</functionname> finds all copies
             of a string in the input. <functionname>split()</functionname> splits the input into parts.
         </para>
-
- <programlisting>
+ <programlisting>
     string str1("hello abc-*-ABC-*-aBc goodbye");
 
     typedef vector&lt; iterator_range&lt;string::iterator&gt; &gt; find_vector_type;
@@ -341,21 +329,26 @@
     split_vector_type SplitVec; // #2: Search for tokens
     split( SplitVec, str1, is_any_of("-*"), token_compress_on ); // SplitVec == { "hello abc","ABC","aBc goodbye" }
         </programlisting>
- <para>
- <code>[hello]</code> designates an <code>iterator_range</code> delimiting this substring.
+ <para>
+ <code>[hello]</code> designates an <code>iterator_range</code> delimiting this substring.
         </para>
- <para>
+ <para>
             First example show how to construct a container to hold references to all extracted
             substrings. Algorithm <functionname>ifind_all()</functionname> puts into FindVec references
             to all substrings that are in case-insensitive manner equal to "abc".
         </para>
- <para>
+ <para>
             Second example uses <functionname>split()</functionname> to split string str1 into parts
             separated by characters '-' or '*'. These parts are then put into the SplitVec.
             It is possible to specify if adjacent separators are concatenated or not.
         </para>
- <para>
+ <para>
             More information can be found in the reference: <headername>boost/algorithm/string/split.hpp</headername>.
         </para>
- </section>
-</section>
+ </section>
+ <section>
+ <title>Finders</title>
+ <para>Finders offer a greater degree of control over what string search algorithms you would like to use for finding.</para>
+ <para>There are two finder types,<classname>finder</classname>and<classname>simplified_finder</classname>, as well as some finder generators:<functionname>first_finder</functionname>,<functionname>last_finder</functionname>,<functionname>nth_finder</functionname>,</para>
+ </section>
+</section>
\ No newline at end of file

Modified: sandbox/SOC/2010/stringalgos/libs/algorithm/string/example/finder_example.cpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/libs/algorithm/string/example/finder_example.cpp (original)
+++ sandbox/SOC/2010/stringalgos/libs/algorithm/string/example/finder_example.cpp 2010-08-07 18:38:24 EDT (Sat, 07 Aug 2010)
@@ -5,6 +5,7 @@
 #include <boost/algorithm/string/string_search.hpp>
 #include <boost/algorithm/string/finder.hpp>
 #include <boost/algorithm/string/case_conv.hpp>
+#include <boost/algorithm/string/finder_aliases.hpp>
 
 #include <boost/range/algorithm/copy.hpp>
 

Modified: sandbox/SOC/2010/stringalgos/libs/algorithm/string/test/finder_test.cpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/libs/algorithm/string/test/finder_test.cpp (original)
+++ sandbox/SOC/2010/stringalgos/libs/algorithm/string/test/finder_test.cpp 2010-08-07 18:38:24 EDT (Sat, 07 Aug 2010)
@@ -292,6 +292,8 @@
     BOOST_CHECK_EQUAL(boost::distance(f.find_first()), 44);
     BOOST_CHECK_EQUAL(boost::distance(f.find_next()), 0);
 
+ //!\todo remove the useless checkpoints 'n stuff
+
     // (0, 1) (1, 2) (2, 3) (3, 4) (4, 5) (5, 6) (6, 7) (7, 8) (8, 9) (9, 10) (10, 11) (11, 12) (12, 13) (13, 14) (14, 15) (15, 16) (16, 17)
     assign_literal(substr,
         "x",1);
@@ -588,13 +590,244 @@
         BOOST_CHECK_EQUAL(f.find_next_index(), i);
 }
 
+void find_test()
+{
+ string str1("123abcxXxabcXxXabc321");
+ string str2("abc");
+ string str3("");
+ const char* pch1="123abcxxxabcXXXabc321";
+ vector<int> vec1( str1.begin(), str1.end() );
+
+ // find results ------------------------------------------------------------//
+ iterator_range<string::iterator> nc_result;
+ iterator_range<string::const_iterator> cv_result;
+
+ iterator_range<vector<int>::iterator> nc_vresult;
+ iterator_range<vector<int>::const_iterator> cv_vresult;
+
+ iterator_range<const char*> ch_result;
+
+ // basic tests ------------------------------------------------------------//
+
+
+ // find_first
+ BOOST_CHECKPOINT( "find_first" );
+
+ nc_result=find_first( str1, string("abc") );
+ BOOST_CHECK(nc_result.begin()-str1.begin() == 3);
+ BOOST_CHECK(nc_result.end()-str1.begin() == 6);
+
+ cv_result=find_first( const_cast<const string&>(str1), str2 );
+ BOOST_CHECK(
+ ( (cv_result.begin()-str1.begin()) == 3) &&
+ ( (cv_result.end()-str1.begin()) == 6) );
+
+ cv_result=ifind_first( const_cast<const string&>(str1), "xXX" );
+ BOOST_CHECK(
+ ( (cv_result.begin()-str1.begin()) == 6) &&
+ ( (cv_result.end()-str1.begin()) == 9) );
+
+ ch_result=find_first( pch1, "abc" );
+ BOOST_CHECK(( (ch_result.begin() - pch1 ) == 3) && ( (ch_result.end() - pch1 ) == 6 ) );
+
+
+ //!\todo uncomment after finishing impl
+ /*
+ // find_last
+ BOOST_CHECKPOINT( "find_last" );
+
+ nc_result=find_last( str1, string("abc") );
+ BOOST_CHECK(
+ ( (nc_result.begin()-str1.begin()) == 15) &&
+ ( (nc_result.end()-str1.begin()) == 18) );
+
+ cv_result=find_last( const_cast<const string&>(str1), str2 );
+ BOOST_CHECK(
+ ( (cv_result.begin()-str1.begin()) == 15) &&
+ ( (cv_result.end()-str1.begin()) == 18) );
+
+ cv_result=ifind_last( const_cast<const string&>(str1), "XXx" );
+ BOOST_CHECK(
+ ( (cv_result.begin()-str1.begin()) == 12) &&
+ ( (cv_result.end()-str1.begin()) == 15) );
+
+ ch_result=find_last( pch1, "abc" );
+ BOOST_CHECK(( (ch_result.begin() - pch1 ) == 15) && ( (ch_result.end() - pch1 ) == 18 ) );
+
+ // find_nth
+ BOOST_CHECKPOINT( "find_nth" );
+
+ nc_result=find_nth( str1, string("abc"), 1 );
+ BOOST_CHECK(
+ ( (nc_result.begin()-str1.begin()) == 9) &&
+ ( (nc_result.end()-str1.begin()) == 12) );
+
+ nc_result=find_nth( str1, string("abc"), -1 );
+ BOOST_CHECK(
+ ( (nc_result.begin()-str1.begin()) == 15) &&
+ ( (nc_result.end()-str1.begin()) == 18) );
+
+
+ cv_result=find_nth( const_cast<const string&>(str1), str2, 1 );
+ BOOST_CHECK(
+ ( (cv_result.begin()-str1.begin()) == 9) &&
+ ( (cv_result.end()-str1.begin()) == 12) );
+
+ cv_result=find_nth( const_cast<const string&>(str1), str2, -1 );
+ BOOST_CHECK(
+ ( (cv_result.begin()-str1.begin()) == 15) &&
+ ( (cv_result.end()-str1.begin()) == 18) );
+
+ cv_result=ifind_nth( const_cast<const string&>(str1), "xxx", 1 );
+ BOOST_CHECK(
+ ( (cv_result.begin()-str1.begin()) == 12) &&
+ ( (cv_result.end()-str1.begin()) == 15) );
+
+ cv_result=ifind_nth( const_cast<const string&>(str1), "xxx", 1 );
+ BOOST_CHECK(
+ ( (cv_result.begin()-str1.begin()) == 12) &&
+ ( (cv_result.end()-str1.begin()) == 15) );
+
+
+ ch_result=find_nth( pch1, "abc", 1 );
+ BOOST_CHECK(( (ch_result.begin() - pch1 ) == 9) && ( (ch_result.end() - pch1 ) == 12 ) );
+ */
+
+ // find_head
+ BOOST_CHECKPOINT( "find_head" );
+
+ nc_result=find_head( str1, 6 );
+ BOOST_CHECK(
+ ( (nc_result.begin()-str1.begin()) == 0) &&
+ ( (nc_result.end()-str1.begin()) == 6) );
+
+ nc_result=find_head( str1, -6 );
+ BOOST_CHECK(
+ ( (nc_result.begin()-str1.begin()) == 0) &&
+ ( (str1.end()-nc_result.end()) == 6 ) );
+
+ cv_result=find_head( const_cast<const string&>(str1), 6 );
+ BOOST_CHECK(
+ ( (cv_result.begin()-str1.begin()) == 0) &&
+ ( (cv_result.end()-str1.begin()) == 6) );
+
+ ch_result=find_head( pch1, 6 );
+ BOOST_CHECK( ( (ch_result.begin() - pch1 ) == 0 ) && ( (ch_result.end() - pch1 ) == 6 ) );
+
+ // find_tail
+ BOOST_CHECKPOINT( "find_tail" );
+
+ nc_result=find_tail( str1, 6 );
+ BOOST_CHECK(
+ ( (nc_result.begin()-str1.begin()) == 15) &&
+ ( (nc_result.end()-str1.begin()) == 21) );
+
+ nc_result=find_tail( str1, -6 );
+ BOOST_CHECK(
+ ( (nc_result.begin()-str1.begin()) == 6) &&
+ ( (nc_result.end()-str1.begin()) == 21) );
+
+
+ cv_result=find_tail( const_cast<const string&>(str1), 6 );
+ BOOST_CHECK(
+ ( (cv_result.begin()-str1.begin()) == 15) &&
+ ( (cv_result.end()-str1.begin()) == 21) );
+
+ ch_result=find_tail( pch1, 6 );
+ BOOST_CHECK( ( (ch_result.begin() - pch1 ) == 15 ) && ( (ch_result.end() - pch1 ) == 21 ) );
+
+ // find_token
+ BOOST_CHECKPOINT( "find_token" );
+
+ nc_result=find_token( str1, is_any_of("abc"), token_compress_on );
+ BOOST_CHECK(
+ ( (nc_result.begin()-str1.begin()) == 3) &&
+ ( (nc_result.end()-str1.begin()) == 6) );
+
+ cv_result=find_token( const_cast<const string&>(str1), is_any_of("abc"), token_compress_on );
+ BOOST_CHECK(
+ ( (cv_result.begin()-str1.begin()) == 3) &&
+ ( (cv_result.end()-str1.begin()) == 6) );
+
+ nc_result=find_token( str1, is_any_of("abc"), token_compress_off );
+ BOOST_CHECK(
+ ( (nc_result.begin()-str1.begin()) == 3) &&
+ ( (nc_result.end()-str1.begin()) == 4) );
+
+ cv_result=find_token( const_cast<const string&>(str1), is_any_of("abc"), token_compress_off );
+ BOOST_CHECK(
+ ( (cv_result.begin()-str1.begin()) == 3) &&
+ ( (cv_result.end()-str1.begin()) == 4) );
+
+ ch_result=find_token( pch1, is_any_of("abc"), token_compress_off );
+ BOOST_CHECK( ( (ch_result.begin() - pch1 ) == 3 ) && ( (ch_result.end() - pch1 ) == 4 ) );
+
+ // generic find
+ BOOST_CHECKPOINT( "generic find" );
+ /*
+ nc_result=find(str1, first_finder(string("abc")));
+ BOOST_CHECK(
+ ( (nc_result.begin()-str1.begin()) == 3) &&
+ ( (nc_result.end()-str1.begin()) == 6) );
+
+ cv_result=find(const_cast<const string&>(str1), first_finder(str2) );
+ BOOST_CHECK(
+ ( (cv_result.begin()-str1.begin()) == 3) &&
+ ( (cv_result.end()-str1.begin()) == 6) );
+ */
+ // multi-type comparison test
+ BOOST_CHECKPOINT( "multi-type" );
+
+ //!\todo fix this in find_test.cpp. find why teh fuck did these tests use to pass.
+ nc_vresult=find_first( vec1, string("abc") );
+ BOOST_CHECK(nc_vresult.begin()-vec1.begin() == 3);
+ BOOST_CHECK(nc_vresult.end()-vec1.begin() == 6);
+
+ cv_vresult=find_first( const_cast<const vector<int>&>(vec1), str2 );
+ BOOST_CHECK(
+ ( (cv_vresult.begin()-vec1.begin()) == 3) &&
+ ( (cv_vresult.end()-vec1.begin()) == 6) );
+
+ // overflow test
+ BOOST_CHECKPOINT( "overflow" );
+
+ nc_result=find_first( str2, string("abcd") );
+ BOOST_CHECK( nc_result.begin()==nc_result.end() );
+ cv_result=find_first( const_cast<const string&>(str2), string("abcd") );
+ BOOST_CHECK( cv_result.begin()==cv_result.end() );
+
+ cv_result=find_head( const_cast<const string&>(str2), 4 );
+ BOOST_CHECK( string( cv_result.begin(), cv_result.end() )== string("abc") );
+ cv_result=find_tail( const_cast<const string&>(str2), 4 );
+ BOOST_CHECK( string( cv_result.begin(), cv_result.end() )== string("abc") );
+
+ // Empty string test
+ BOOST_CHECKPOINT( "empty" );
+
+ nc_result=find_first( str3, string("abcd") );
+ BOOST_CHECK( nc_result.begin()==nc_result.end() );
+ nc_result=find_first( str1, string("") );
+ BOOST_CHECK( nc_result.begin()==nc_result.end() );
+
+ cv_result=find_first( const_cast<const string&>(str3), string("abcd") );
+ BOOST_CHECK( cv_result.begin()==cv_result.end() );
+ cv_result=find_first( const_cast<const string&>(str1), string("") );
+ BOOST_CHECK( cv_result.begin()==cv_result.end() );
+
+ // iterator_range specific tests
+ ostringstream osstr;
+ osstr << find_first( str1, "abc" );
+ BOOST_CHECK( osstr.str()=="abc" );
+}
+
+
 boost::unit_test::test_suite* init_unit_test_suite( int argc, char* argv[] )
 {
 
     /*!\todo test with other allocators and comparators
     \todo test with different iterator types, depending on what each algorithm supports
     \todo test matches of empty substring in nonempty string (should return empty range but iterators with proper offset)
- \todo test the nonmatch of nonempty substring in empty string
+ \todo test the non-match of nonempty substring in empty string
     \todo test the unique match of empty substring in empty string
     \todo test more strings with consecutive substring matches
     */
@@ -614,5 +847,7 @@
     boost::unit_test::framework::master_test_suite().add(
         BOOST_TEST_CASE(logpow_test)
         );
+ boost::unit_test::framework::master_test_suite().add(
+ BOOST_TEST_CASE(find_test));
     return NULL;
 }


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk