Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r64032 - in sandbox/SOC/2010/stringalgos: boost/algorithm/string boost/algorithm/string/detail boost/algorithm/string/string_search libs/algorithm/string/example
From: mstefanro_at_[hidden]
Date: 2010-07-14 21:42:12


Author: mstefanro
Date: 2010-07-14 21:42:11 EDT (Wed, 14 Jul 2010)
New Revision: 64032
URL: http://svn.boost.org/trac/boost/changeset/64032

Log:
[GSoC2010][StringAlgo] Some fixes, an example.
Added:
   sandbox/SOC/2010/stringalgos/libs/algorithm/string/example/finder_example.cpp (contents, props changed)
Text files modified:
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/detail/finder.hpp | 2 +-
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder.hpp | 29 +++++++++++++++--------------
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/suffix_array.hpp | 7 +++++++
   3 files changed, 23 insertions(+), 15 deletions(-)

Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/detail/finder.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/detail/finder.hpp (original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/detail/finder.hpp 2010-07-14 21:42:11 EDT (Wed, 14 Jul 2010)
@@ -49,7 +49,7 @@
         typedef typename boost::range_const_iterator<substring_type>::type
             substring_iterator_type;
         //! The type of the string's iterator
- typedef typename boost::range_const_iterator<string_type>::type
+ typedef typename boost::range_iterator<string_type>::type
             string_iterator_type;
         //! The character type of the substring
         typedef typename boost::iterator_value<substring_iterator_type>::type

Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder.hpp (original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder.hpp 2010-07-14 21:42:11 EDT (Wed, 14 Jul 2010)
@@ -136,7 +136,7 @@
             typename boost::call_traits<allocator_type>::const_reference get_allocator() const
             { return allocator_; }
 
- private:
+ protected:
             substring_range_type substring_range_;
             string_range_type string_range_;
             bool substring_has_changed_, string_has_changed_;
@@ -174,7 +174,7 @@
                         Comparator,Allocator*/
                 typename finder_t<Sequence1T, Sequence2T, Algorithm, Comparator, Allocator, AdditionalBehavior>,
                 typename boost::range_const_iterator<Sequence1T>::type,
- typename boost::range_const_iterator<Sequence2T>::type,
+ typename boost::range_iterator<Sequence2T>::type,
                 Comparator, Allocator>,
             private AdditionalBehavior<
                 typename finder_t<Sequence1T, Sequence2T, Algorithm, Comparator, Allocator, AdditionalBehavior>,
@@ -198,7 +198,7 @@
             //! The type of the substring's iterator
             typedef typename boost::range_const_iterator<Sequence1T>::type substring_iterator_type;
             //! The type of the string's iterator
- typedef typename boost::range_const_iterator<Sequence2T>::type string_iterator_type;
+ typedef typename boost::range_iterator<Sequence2T>::type string_iterator_type;
             //! The character type of the substring
             typedef typename boost::iterator_value<substring_iterator_type>::type substring_char_type;
             //! The character type of the string
@@ -230,7 +230,7 @@
                 \note If a rvalue reference is passed as the string or substring, and your compiler supports rvalue
                     references, then a move is performed as opposed to a copy.
              */
- explicit finder_t (const Sequence1T *const substring = 0, const Sequence2T *const string = 0,
+ explicit finder_t (const Sequence1T *const substring = 0, Sequence2T *const string = 0,
                 Comparator comparator = Comparator(), Allocator allocator = Allocator())
                 : comparator_(comparator), allocator_(allocator),
                 substring_optional_copy_(), substring_range_(substring?*substring:substring_optional_copy_),
@@ -258,7 +258,7 @@
 
             //! \overload
             template <class Range1T>
- explicit finder_t (const Range1T &substring, const Sequence2T *const string = 0,
+ explicit finder_t (const Range1T &substring, Sequence2T *const string = 0,
                 Comparator comparator = Comparator(), Allocator allocator = Allocator(),
                 typename boost::disable_if<typename ::boost::algorithm::detail::is_pointer_to<Range1T,Sequence1T> >::type* = 0)
                 : comparator_(comparator), allocator_(allocator),
@@ -294,7 +294,7 @@
             template <class Range2T>
             explicit finder_t (
                 Sequence1T const &&substring,
- const Sequence2T *const string = 0,
+ Sequence2T *const string = 0,
                 Comparator comparator = Comparator(), Allocator allocator = Allocator())
                 : comparator_(comparator), allocator_(allocator),
                 substring_optional_copy_(std::move(substring)), string_optional_copy_(),
@@ -322,7 +322,7 @@
             //! \overload
             finder_t (
                 Sequence1T const &&substring,
- Sequence2T const &&string,
+ Sequence2T &&string,
                 Comparator comparator = Comparator(), Allocator allocator = Allocator())
                 : comparator_(comparator), allocator_(allocator),
                 substring_optional_copy_(std::move(substring)), string_optional_copy_(std::move(string)),
@@ -334,7 +334,7 @@
 
             //! \overload
             finder_t (const Sequence1T *const substring,
- Sequence2T const &&string,
+ Sequence2T &&string,
                 Comparator comparator = Comparator(), Allocator allocator = Allocator())
                 : comparator_(comparator), allocator_(allocator),
                 substring_optional_copy_(), string_optional_copy_(std::move(string)),
@@ -347,7 +347,7 @@
             //! \overload
             template <class Range1T>
             finder_t (const Range1T &substring,
- Sequence2T const &&string,
+ Sequence2T &&string,
                 Comparator comparator = Comparator(), Allocator allocator = Allocator(),
                 typename boost::disable_if<typename ::boost::algorithm::detail::is_pointer_to<Range1T,Sequence1T> >::type* = 0)
                 : comparator_(comparator), allocator_(allocator),
@@ -454,7 +454,7 @@
                 string_has_changed_ = true;
             }
             
- void set_string (Sequence2T const *const string = 0)
+ void set_string (Sequence2T *const string = 0)
             {
                 string_optional_copy_.clear();
                 if (string)
@@ -466,7 +466,7 @@
 
 # ifdef BOOST_HAS_RVALUE_REFS
             void set_string (
- Sequence2T const &&string)
+ Sequence2T &&string)
             {
                 string_optional_copy_ = std::move(string);
                 string_range_ = string_optional_copy_;
@@ -482,10 +482,11 @@
                     with the previous Finder concept
                 \todo This should probably only exist to classes that derive from finder_t (such as first_finder_t etc.)
              */
- template <class IteratorT>
- boost::iterator_range<IteratorT> operator()(IteratorT const &substring_start, IteratorT const &substring_end)
+ string_range_type operator()(string_iterator_type const &string_start,
+ string_iterator_type const &string_end)
             {
-
+ set_string( boost::make_iterator_range(string_start, string_end) );
+ return find_first();
             }
             
             //! Performs a search using the chosen algorithm.

Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/suffix_array.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/suffix_array.hpp (original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/suffix_array.hpp 2010-07-14 21:42:11 EDT (Wed, 14 Jul 2010)
@@ -13,6 +13,9 @@
     struct suffix_array_search
     {
         typedef std::allocator<std::size_t> default_allocator_type;
+
+ //! \TODO this currently only works for boost::algorithm::is_equal as comparator because we don't yet have a template
+ //! parameter for LessThanComparator. Maybe we should pass two comparators, give it some thought.
         template <class Finder,class RandomAccessIterator1T,
             class RandomAccessIterator2T,class Comparator,class Allocator>
         class algorithm
@@ -40,6 +43,8 @@
             {
                 substring_range_type const &substr = static_cast<Finder*>(this)->get_substring_range();
                 string_range_type const &str = static_cast<Finder*>(this)->get_string_range();
+ comparator_type const &comp = static_cast<Finder*>(this)->get_comparator();
+
                 std::size_t start_offset = start - boost::begin(str),
                     substr_size = boost::end(substr) - boost::begin(substr),
                     str_size = boost::end(str) - boost::begin(str);
@@ -57,6 +62,8 @@
                 if (lastsuffix_end > str_size) lastsuffix_end = str_size;
                 //the substring is smaller than the smallest lexicographic suffix, therefore no matches
                 //if (std::lexicographical_compare(substr.begin(), substr.end(),str.begin()+pos[0],str.begin()+firstsuffix_end))
+
+ //! \TODO Is this really correct? Just because it starts before other suffix it does not mean there are no matches
                 if (suffix_less(substr, str, 0) ||
                     std::lexicographical_compare(str.begin()+pos_.back(),str.begin()+lastsuffix_end,substr.begin(),substr.end())
                     )

Added: sandbox/SOC/2010/stringalgos/libs/algorithm/string/example/finder_example.cpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2010/stringalgos/libs/algorithm/string/example/finder_example.cpp 2010-07-14 21:42:11 EDT (Wed, 14 Jul 2010)
@@ -0,0 +1,79 @@
+#include <string>
+#include <vector>
+#include <iostream>
+
+#include <boost/algorithm/string/string_search.hpp>
+#include <boost/algorithm/string/finder.hpp>
+#include <boost/algorithm/string/case_conv.hpp>
+
+#include <boost/range/algorithm/copy.hpp>
+
+int main ()
+{
+
+//Example 1: KMP
+
+ typedef boost::finder_t<std::wstring, std::wstring, boost::knuth_morris_pratt> finder;
+ finder f;
+
+ f.set_string(L"The world is mine"); // set the string to search for: "The world is mine"
+ f.set_substring(L"mine"); // set the pattern to search for: "mine"
+ std::wstring::difference_type match = f.find_first_index(); // searches pattern "mine" in text
+ // "The world is mine"
+ if (match != static_cast<std::string::difference_type>(-1))
+ std::wcout << L"Found a match at position " << match << std::endl;
+
+ f.set_substring(L"world"); // Note: the string stays the same, only the substring was changed
+ match = f.find_first_index(); // searches pattern "world" in text "The world is mine"
+ if (match != static_cast<std::string::difference_type>(-1))
+ std::wcout << L"Found a match at position " << match << std::endl;
+
+ // Turn "Hello world" into "Hello WORLD"
+ std::wstring mystr(L"Hello world");
+ f.set_string(mystr);
+ match = f.find_first_index(); // searches pattern "world" in text "Hello world"
+ boost::to_upper( boost::make_iterator_range(mystr.begin()+match,mystr.begin()+match+5) );
+ std::wcout << mystr << std::endl;
+
+ // Turn "Hello WORLD" into "HELLO WORLD"
+ f.set_string(&mystr);
+ f.set_substring(L"Hello");
+ finder::string_range_type match2 = f.find_first(); // because we've passed the string as a
+ // pointer, no internal copy of the text
+ // was made, and we can use find_first()
+ // to get a range of the match
+ boost::to_upper(match2);
+ std::wcout << mystr << std::endl;
+
+// Example 2: Searching with suffix arrays
+
+ typedef boost::finder_t<std::vector<wchar_t>, std::wstring, boost::suffix_array_search,
+ boost::is_equal> finder2;
+ //the pattern's type is a vector of wchar_t-s, whereas the text's type is a wstring
+ finder2 f2;
+ f2.set_string(
+ L"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer interdum elit ac orci "
+ L"fermentum in pretium lectus facilisis. Vestibulum rutrum convallis justo nec fringilla. "
+ L"Sed magna justo, sollicitudin sed viverra in, lacinia quis metus. Integer volutpat, nisl "
+ L"rhoncus condimentum accumsan, est mi fermentum lacus, sed imperdiet sem erat a risus. "
+ ); // Note: this works by making an internal copy (or move if the compiler supports it) of
+ // the given text
+ f2.set_substring(L"consectetur");
+
+ boost::to_upper(f2.find_first()); // finds consectetur in the internal copy
+ // then makes it uppercase
+ //turns all occurences of letter e into uppercase
+ f2.set_substring(L"e");
+ for (finder2::string_range_type range = f2.find_first();
+ boost::begin(range) != boost::end(range); range = f2.find_next())
+ {
+ boost::to_upper(range);
+ }
+
+ //display the internal copy of the text
+ boost::copy(f2.get_string_range(), std::ostream_iterator<wchar_t,wchar_t>(std::wcout));
+ std::wcout << std::endl;
+
+ std::cin.get();
+ return 0;
+}


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk