Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r64851 - in sandbox/SOC/2010/stringalgos: boost/algorithm/string/finder/detail boost/algorithm/string/string_search libs/algorithm/string/benchmark libs/algorithm/string/doc
From: mstefanro_at_[hidden]
Date: 2010-08-16 14:00:26


Author: mstefanro
Date: 2010-08-16 14:00:23 EDT (Mon, 16 Aug 2010)
New Revision: 64851
URL: http://svn.boost.org/trac/boost/changeset/64851

Log:
[GSoC2010][StringAlgo] doc, kmp improvements
Text files modified:
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder/detail/finder_typedefs.hpp | 3
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/knuth_morris_pratt.hpp | 12 +-
   sandbox/SOC/2010/stringalgos/libs/algorithm/string/benchmark/string_search.cpp | 150 ++-------------------------------------
   sandbox/SOC/2010/stringalgos/libs/algorithm/string/doc/usage.xml | 64 +++++++++-------
   4 files changed, 51 insertions(+), 178 deletions(-)

Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder/detail/finder_typedefs.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder/detail/finder_typedefs.hpp (original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder/detail/finder_typedefs.hpp 2010-08-16 14:00:23 EDT (Mon, 16 Aug 2010)
@@ -68,9 +68,8 @@
         BOOST_ALGORITHM_DETAIL_FINDER_TYPEDEFS(Range1T, Range2T);
         BOOST_ALGORITHM_DETAIL_COMMON_FINDER_TYPEDEFS2(ComparatorT, AllocatorT);
     };
-*/
 
 } } }
-
+*/
 
 #endif // BOOST_ALGORITHM_FINDER_TYPEDEFS_HPP

Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/knuth_morris_pratt.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/knuth_morris_pratt.hpp (original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/knuth_morris_pratt.hpp 2010-08-16 14:00:23 EDT (Mon, 16 Aug 2010)
@@ -90,6 +90,7 @@
 
                 std::size_t str_size = boost::end(str) - boost::begin(str),
                     substr_size = boost::end(substr) - boost::begin(substr);
+
                 std::size_t str_idx = start - boost::begin(str), substr_idx = 0;
 
                 if (boost::begin(substr) == boost::end(substr))
@@ -151,15 +152,15 @@
                 
                 std::size_t substr_size = boost::end(substr) - boost::begin(substr);
                 failure_func.clear();
- failure_func.reserve(substr_size);
- failure_func.push_back(0); // failure_func[0] = 0
+ //failure_func.reserve(substr_size);
+ //failure_func.push_back(0); // failure_func[0] = 0
+ failure_func.resize(substr_size);
                 
                 if (substr_size < 2) return;
 
                 //std::size_t i = 0, j = 1;
                 substring_iterator_type i = boost::begin(substr), j = boost::begin(substr)+1;
 
- //while (j < substr_size)
                 do
                 {
                     //while (i > 0 && !comp_(*(boost::begin(substr)+i), *(boost::begin(substr)+j)))
@@ -173,13 +174,14 @@
                         !comp_(*i,*j))
                     {
                         //Invariant: i == 0 and substr[0] != substr[j], which means failure_func[j]=0
- failure_func.push_back(0);
+ //failure_func.push_back(0);
                         ++j;
                     }
                     //Invariant: j is out of bounds or P[i]==P[j], meaning failure_func[j]=i+1
                     /*if (j < substr_size)
                     {
- failure_func.push_back(i+1);
+ failure_func[j]=i+1;
+ //failure_func.push_back(i+1);
                         ++j; ++i;
                     }
                     else break;*/

Modified: sandbox/SOC/2010/stringalgos/libs/algorithm/string/benchmark/string_search.cpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/libs/algorithm/string/benchmark/string_search.cpp (original)
+++ sandbox/SOC/2010/stringalgos/libs/algorithm/string/benchmark/string_search.cpp 2010-08-16 14:00:23 EDT (Mon, 16 Aug 2010)
@@ -1,146 +1,11 @@
-/*
-#if defined(BOOST_MPL_LIMIT_METAFUNCTION_ARITY) && BOOST_MPL_LIMIT_METAFUNCTION_ARITY < 6
-#undef BOOST_MPL_LIMIT_METAFUNCTION_ARITY
-#define BOOST_MPL_LIMIT_METAFUNCTION_ARITY 10
-#elif !defined(BOOST_MPL_LIMIT_METAFUNCTION_ARITY)
-#define BOOST_MPL_LIMIT_METAFUNCTION_ARITY 10
-#endif
-*/
-
-
-#if 0
-
-#include <boost/type_traits.hpp>
-
-
-#include <boost/mpl/lambda.hpp>
-#include <boost/mpl/placeholders.hpp>
-#include <boost/mpl/transform.hpp>
-#include <boost/mpl/vector_c.hpp>
-#include <boost/mpl/equal.hpp>
-#include <boost/mpl/fold.hpp>
-#include <boost/mpl/vector.hpp>
-
-
-#include <iostream>
-#include <string>
-#include <vector>
-
-
-using namespace std;
-using namespace boost;
-
-/*
-template <class F, class X>
-struct twice : boost::mpl::apply<F, typename boost::mpl::apply<F, X>::type> {};
-
-using namespace boost::mpl;
-using namespace boost::mpl::placeholders;
-
-template <class S1, class S2>
-struct cross_product
-{
- template <class OldS, class T>
- struct inner_cross_product
- {
- typedef typename boost::mpl::fold<S2, OldS,
- boost::mpl::push_back<boost::mpl::_1, std::pair<T, boost::mpl::_2> > >::type type;
- };
-
- typedef typename boost::mpl::fold<S1, boost::mpl::vector0<>,
- inner_cross_product<boost::mpl::_1,boost::mpl::_2> >::type type;
-};
-
-int main ()
-{
- using namespace boost;
- using namespace boost::mpl::placeholders;
- typedef twice<boost::add_pointer<boost::mpl::_1>,int>::type A;
- static_assert(is_same<A, int**>::value,"a");
- static_assert(
- boost::mpl::equal<
- boost::mpl::transform<
- boost::mpl::vector_c<int, 1,2,3>,
- boost::mpl::plus<_,boost::mpl::int_<1> >
- >::type,
- boost::mpl::vector_c<int,2,3,4>
- >::value, "c");
- static_assert(
- boost::mpl::equal<
- boost::mpl::transform<
- boost::mpl::vector_c<int,1,2,3>,
- boost::mpl::vector_c<int,1,1,1>,
- boost::mpl::plus<_,_>
- >::type,
- boost::mpl::vector_c<int,2,3,4>
- >::value, "b");
- static_assert(
- boost::mpl::equal<
- boost::mpl::fold<
- boost::mpl::vector<int>, boost::mpl::vector0<>,
- boost::mpl::push_back<boost::mpl::_1,std::pair<boost::mpl::_2,boost::mpl::_2> > >::type,
- boost::mpl::vector<std::pair<int,int>>
- >::value, "c");
- static_assert(
- boost::mpl::equal<
- cross_product<
- boost::mpl::vector<char>, boost::mpl::vector<char>
- >::type,
- boost::mpl::vector<std::pair<char,char>>
- >::value, "ohai"
- );
-
-
- std::cin.get();
- return 0;
-}
-*/
-
-
-
-
-
-
-#include <boost/mpl/lambda.hpp>
-#include <boost/mpl/placeholders.hpp>
-#include <boost/mpl/transform.hpp>
-#include <boost/mpl/vector_c.hpp>
-#include <boost/mpl/equal.hpp>
-#include <boost/mpl/fold.hpp>
-#include <boost/mpl/vector.hpp>
-#include <boost/mpl/apply.hpp>
-#include <boost/fusion/adapted/mpl.hpp>
-
-
-template <class T, class U, class V> struct A : T::template X<U> {};
-
-#if 0
-template <class Range1T, class Range2T, class AlgorithmT/*,
- class ComparatorT = ::boost::algorithm::is_equal,*/
- //class AllocatorT = /*typename AlgorithmT::default_allocator_type*/std::allocator<std::size_t>/*,
- //class AdditionalBehaviorT = boost::algorithm::finder_no_additional_behavior*/
->
-class simplified_finder_t2 :
- //public boost::algorithm::detail::finder_typedefs<Range1T,Range2T,ComparatorT,AllocatorT>,
- private AlgorithmT::template algorithm<
- simplified_finder_t<Range1T, Range2T, AlgorithmT>,
- BOOST_STRING_TYPENAME boost::range_const_iterator<Range1T>::type,
- BOOST_STRING_TYPENAME boost::range_iterator<Range2T>::type//,
- //ComparatorT,AllocatorT>
- >
-{ };
-#endif
-
-#endif
-
-//#define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS
-//#define BOOST_MPL_LIMIT_METAFUNCTION_ARITY 10
 #include <boost/algorithm/string/benchmark_finder.hpp>
 
 #include <boost/algorithm/string/find.hpp>
 #include <boost/algorithm/string/finder.hpp>
 #include <boost/algorithm/string/string_search.hpp>
 
+//#include <boost/algorithm/string/string_search/knuth_morris_pratt2.hpp>
+
 #include <boost/mpl/lambda.hpp>
 #include <boost/fusion/algorithm/transformation/transform.hpp>
 #include <boost/fusion/container/vector/convert.hpp>
@@ -301,11 +166,11 @@
     boost::benchmark_finder_t<std::string, std::string,
         boost::mpl::vector<
             boost::naive_search,
- boost::knuth_morris_pratt//,
- //boost::boyer_moore,
- //boost::suffix_array_search,
- //boost::rabin_karp32//,
- //boost::rabin_karp64
+ boost::knuth_morris_pratt,
+ boost::boyer_moore,
+ boost::suffix_array_search,
+ boost::rabin_karp32,
+ boost::rabin_karp64
>,
        boost::is_equal> b;
 
@@ -318,6 +183,7 @@
 
     b.clear();
 
+ //Use your own files here to benchmark.
     std::string const &benchmark_path = "E:/gsoc-boost/benchmarks";
 
     std::vector<std::string> benchmark_files = list_of

Modified: sandbox/SOC/2010/stringalgos/libs/algorithm/string/doc/usage.xml
==============================================================================
--- sandbox/SOC/2010/stringalgos/libs/algorithm/string/doc/usage.xml (original)
+++ sandbox/SOC/2010/stringalgos/libs/algorithm/string/doc/usage.xml 2010-08-16 14:00:23 EDT (Mon, 16 Aug 2010)
@@ -351,47 +351,53 @@
     <title>Finders</title>
     <para>Finders offer a greater degree of control over what string search algorithms you would like to use for finding. Some of them may also be used as parameters to find/replace/split functions. </para>
     <para>They are especially useful if multiple searches are to be performed and the string or the substring do not change every iteration (because most string search algorithms precompute information on either the sought string or the text, meaning subsequent searches can be faster).</para>
- <para>There currently are 6 supported finder types: <classname alt="boost::algorithm::finder_t">finder_t</classname>, <classname alt="boost::algorithm::simplified_finder_t">simplified_finder_t</classname>, <classname alt="boost::algorithm::benchmark_finder_t">benchmark_finder_t</classname>, <classname alt="boost::algorithm::first_finder_t">first_finder_t</classname>, <classname alt="boost::algorithm::nth_finder_t">nth_finder_t</classname> and <classname alt="boost::algorithm::last_finder_t">last_finder_t</classname>.<programlisting>std::string random_saying =
- "Speak when you are angry and you will make the best speech you'll ever regret.";
+ <para>
+ There currently are 6 supported finder types: <classname alt="boost::algorithm::finder_t">finder_t</classname>, <classname alt="boost::algorithm::simplified_finder_t">simplified_finder_t</classname>, <classname alt="boost::algorithm::benchmark_finder_t">benchmark_finder_t</classname>, <classname alt="boost::algorithm::first_finder_t">first_finder_t</classname>, <classname alt="boost::algorithm::nth_finder_t">nth_finder_t</classname> and <classname alt="boost::algorithm::last_finder_t">last_finder_t</classname>.
+ </para>
+ <para><programlisting>std::string random_saying =
+ "Speak when you are angry and you will make the best speech you'll ever regret.", pattern;
+boost::iterator_range&lt;std::string::iterator&gt; match;
 
-//Create a finder_t object that will search using the Knuth-Morris-Pratt algorithm.
-//Rule of thumb for finder_t is:
-// pass a pointer and no copy is made, pass a reference and an internal copy is made.
-//Note that finder_t is the only finder to support passing references (other finders only support
-// passing pointers)
-//In this example, we pass a temporary as the pattern (which will be copied internally), and a pointer to a range
-//as the text (and no copy will be made).
 boost::finder_t&lt;std::string, std::string, boost::knuth_morris_pratt&gt; finder("angry", &amp;random_saying);
-
-//Look for the first match of the pattern in the text using our finder
-boost::iterator_range&lt;std::string::iterator&gt; &amp;match = finder.find_first();
-boost::to_upper(match); // turn the matching range into uppercase
+match = finder.find_first();
+boost::to_upper(match);
 std::cout &lt;&lt; random_saying &lt;&lt; std::endl; // Outputs "Speak when you are ANGRY..."
-
-//Create a functor finder (first_finder_t, last_finder_t, nth_finder_t are functor finders)
-//which looks for the last occurrence of pattern "you" in a string using the Boyer-Moore algorithm.
-std::string pattern = "you";
+</programlisting>
+In the above example, we start by constructing a <classname alt="boost::algorithm::finder_t">finder_t</classname> object that will search using the Knuth-Morris-Pratt algorithm.
+Rule of thumb for finders is: pass a pointer to a range and no copy is made, pass a reference to a range and an internal copy is made.
+Note that <classname alt="boost::algorithm::finder_t">finder_t</classname> is the only finder to support passing references (therefore, the only finder that supports copying passed data internally).
+In this example, we pass a temporary as the pattern (which will be copied internally), and a pointer to a range as the text (so no copy of the text will be made).
+The code above finds the first occurrence of the word "angry" in the string stored in random_saying and then turns it into uppercase.
+</para>
+<para>
+ <programlisting>
+pattern = "you";
 boost::last_finder_t&lt;std::string, boost::boyer_moore&gt; finder2(&amp;pattern);
-//Call the functor using the string in which to search
 match = finder2(random_saying.begin(), random_saying.end());
-//Turn the last occurrence of "you" in random_saying to uppercase
-boost::to_upper(match);
+boost::to_upper(match); //Turn the last occurrence of "you" in random_saying to uppercase
 std::cout &lt;&lt; random_saying &lt;&lt; std::endl; // Outputs "... YOU'll ever regret."
-
-//Create a simplified_finder_t. Very similar to finder_t, except it only accepts
-//pointers (no references, which means it never attempts to make copies).
-pattern = "Speak";
+ </programlisting>
+ Next, we create a functor finder (<classname alt="boost::algorithm::first_finder_t">first_finder_t</classname>, <classname alt="boost::algorithm::nth_finder_t">nth_finder_t</classname>, <classname alt="boost::algorithm::last_finder_t">last_finder_t</classname> are functor finders)
+which looks for the last ocurrence of the pattern "you" in a string using the Boyer-Moore algorithm and then turns it into uppercase.
+</para>
+<para>
+ <programlisting>pattern = "Speak";
 boost::simplified_finder_t&lt;std::string, std::string,
     boost::suffix_array_search&gt; finder3;
 finder3.set_string(&amp;random_saying);
 finder3.set_substring(&amp;pattern);
 match = finder3.find_first();
 boost::to_upper(match);
-//Important:
-//if we want to use finder3 at this point, we have to call set_string again,
-//because the string has changed (we turned some of it into uppercase and the finder has no idea we did that)
-finder3.set_string(&amp;random_saying);
 
-std::cout &lt;&lt; random_saying &lt;&lt; std::endl; // Outputs "SPEAK when you are ANGRY and you will make the best speech YOU'll ever regret."</programlisting>For more information on the features and functionality of various finder types, see <xref linkend="string_algo.quickref.finder_types"></xref>.</para>
+// Outputs "SPEAK when you are ANGRY and you will make the best speech YOU'll ever regret."
+std::cout &lt;&lt; random_saying &lt;&lt; std::endl;</programlisting>
+Finally, we create a <classname alt="boost::algorithm::simplified_finder_t">simplified_finder_t</classname> object. This behaves very similarly to <classname alt="boost::algorithm::finder_t">finder_t</classname>, except that
+it only allows pointer overloads (it never makes internal copies). Due to this fact, the user of the finder must guarantee that the lifetime of the pointees are at least as long as that of the finder.
+Note that after we have turned "Speak" into "SPEAK", our original string (<emphasis>random_saying</emphasis>) has changes, which means we have to call <emphasis>set_string()</emphasis> again.
+
+</para>
+
+<para>For more information on the features and functionality of various finder types, see <xref linkend="string_algo.quickref.finder_types"></xref>.</para>
+
   </section>
 </section>
\ No newline at end of file


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk