Boost logo

Boost :

From: David Manura (dm.list_at_[hidden])
Date: 2006-07-20 00:08:32


Hello,

I've been testing the performance of the Boost string algorithms
library using the below program "boost_string_benchmark.cpp" as a
simple benchmark. Here are the results I get using the CVS copy of
Boost for two compilers and four methods of coding a left trim:

   MSVC++2005: 1: 4171 2: 3235 3: 3312 4: 4860 (run times)
   G++/Intel: 1: 1657 2: 1156 3: 1172 4: 2953

Now, I was getting order-of-magnitude lower run times with direct C
coding of trim_left, so I made some minor changes to the Boost source
in hope of improving this (below diff.txt). Here are the results
after the changes:

   MSVC++2005: 1: 2406 2: 2359 3: 1500 4: 297 (run times)
   G++/Intel 1: 703 2: 703 3: 188 4: 172

It's a good improvement. In particular, the results using method #4
are much more like the order-of-magnitude improvement I saw with
direct C coding. Therefore, such results are certainly achievable
with Boost after a reasonable patch to Boost and coding in a more
awkward way (method #4). It would be nice if I could obtain the same
speedup using more typical coding methods (method #1).

I bring this up for discussions since others here are much more
familiar with the Boost internals than I am. Is is not reasonable to
expect Boost string processing performance to be almost as fast as
direct C coding? I only looked into this some, but it seemed that
repeated temporary object construction was to blame for much of the
slowdown, and the patch only partly addresses that.

BTW, Boost is a really great project, and I appreciate the work you
do. I have some other suggestions on Boost relating to flex_string
support that I'll leave for a future discussion.

--davidm

<snip>
// boost_string_benchmark.cpp
// benchmarks for Boost string algorithms
// David Manura, 20060719

// VC++2005: cl /EHsc /D_SCL_SECURE_NO_DEPRECATE /D_CRT_SECURE_NO_DEPRECATE /O2 /DNDEBUG /MT /D_MBCS boost_string_benchmark.cpp
// GCC: g++ -O2 -DNDEBUG boost_string_benchmark.cpp

#include <boost/algorithm/string.hpp>
#include <ctime>
#include <iostream>

using namespace boost;
using namespace std;

const int num_loops = 3000000;

struct cstring {
     typedef const char * const_iterator;
     typedef char * iterator;

     char * & b_;
     char * e_;

     iterator begin() { return b_; }
     iterator end() { return e_; }
     cstring(char * s) : b_(s), e_(s + strlen(s)) { }
     char * erase(char * p1, char * p2) {
         // for testing, we may comment this implementation out.
         //memmove(b_, p2, e_-p2+1);
         //e_ -= (p2 - p1);
         return p1;
     }
};

template<class T1, class T2, class T3, class T4>
void compare()
{
     for (int n=0; n<3; n++) {
         clock_t t0 = clock();
         T1::run();
         clock_t t1 = clock();
         T2::run();
         clock_t t2 = clock();
         T3::run();
         clock_t t3 = clock();
         T4::run();
         clock_t t4 = clock();
         cout
           << " 1: " << (t1 - t0)
           << " 2: " << (t2 - t1)
           << " 3: " << (t3 - t2)
           << " 4: " << (t4 - t3)
           << endl;
    }
}

template <class String>
struct test1_string1 {
     static void run() {
         for (int n=0; n<num_loops; n++) {
              char c[] = " 12345678901234567890 ";
              String s(c);
              trim_left(s);
         }
     }
};

template <class String>
struct test1_string2 {
     static void run() {
         const std::locale loc;
         for (int n=0; n<num_loops; n++) {
              char c[] = " 12345678901234567890 ";
              String s(c);
              trim_left(s, loc);
         }
     }
};

template <class String>
struct test1_string3 {
     static void run() {
         const std::locale loc;
         const boost::algorithm::detail::is_classifiedF isspace2 = is_space(loc);
         for (int n=0; n<num_loops; n++) {
              char c[] = " 12345678901234567890 ";
              String s(c);
              trim_left_if(s, isspace2);
         }
     }
};

template <class String>
struct test1_string4 {
     static void run() {
         const boost::algorithm::detail::is_any_ofF<char> isspace2 = is_any_of(" ");
         for (int n=0; n<num_loops; n++) {
              char c[] = " 12345678901234567890 ";
              String s(c);
              trim_left_if(s, isspace2);
         }
     }
};

template <class String>
struct tests1 {
     static void run() {
         compare<test1_string1<String>,
                 test1_string2<String>,
                 test1_string3<String>,
                 test1_string4<String>
>();
     }
};

int main()
{
     tests1<cstring>::run();

     //tests1<std::string>::run();

     return 0;
}
</snip>

<snip>
diff -udr boost/boost/algorithm/string/detail/trim.hpp boost-patched/boost/algorithm/string/detail/trim.hpp
--- boost/boost/algorithm/string/detail/trim.hpp 2004-03-04 17:11:41.000000000 -0500
+++ boost-patched/boost/algorithm/string/detail/trim.hpp 2006-07-19 21:12:59.703125000 -0400
@@ -12,11 +12,14 @@

  #include <boost/algorithm/string/config.hpp>
  #include <boost/detail/iterator.hpp>
+#include <locale>

  namespace boost {
      namespace algorithm {
          namespace detail {

+ static const std::locale default_locale_;
+
  // trim iterator helper -----------------------------------------------//

              // Search for first non matching character from the beginning of the sequence
@@ -24,7 +27,7 @@
              inline ForwardIteratorT trim_begin(
                  ForwardIteratorT InBegin,
                  ForwardIteratorT InEnd,
- PredicateT IsSpace )
+ const PredicateT & IsSpace )
              {
                  ForwardIteratorT It=InBegin;
                  for(; It!=InEnd; ++It )
@@ -41,7 +44,7 @@
              inline ForwardIteratorT trim_end(
                  ForwardIteratorT InBegin,
                  ForwardIteratorT InEnd,
- PredicateT IsSpace )
+ const PredicateT & IsSpace )
              {
                  typedef BOOST_STRING_TYPENAME boost::detail::
                      iterator_traits<ForwardIteratorT>::iterator_category category;
@@ -53,7 +56,7 @@
              inline ForwardIteratorT trim_end_iter_select(
                  ForwardIteratorT InBegin,
                  ForwardIteratorT InEnd,
- PredicateT IsSpace,
+ const PredicateT & IsSpace,
                  std::forward_iterator_tag )
              {
                  ForwardIteratorT TrimIt=InBegin;
@@ -74,7 +77,7 @@
              inline ForwardIteratorT trim_end_iter_select(
                  ForwardIteratorT InBegin,
                  ForwardIteratorT InEnd,
- PredicateT IsSpace,
+ const PredicateT & IsSpace,
                  std::bidirectional_iterator_tag )
              {
                  for( ForwardIteratorT It=InEnd; It!=InBegin; )
Only in boost-patched/boost/algorithm/string/detail: trim.hpp~
Only in boost-patched/boost/algorithm/string/detail: util.hpp~
diff -udr boost/boost/algorithm/string/trim.hpp boost-patched/boost/algorithm/string/trim.hpp
--- boost/boost/algorithm/string/trim.hpp 2005-06-06 16:03:18.000000000 -0400
+++ boost-patched/boost/algorithm/string/trim.hpp 2006-07-19 21:41:07.515625000 -0400
@@ -58,7 +58,7 @@
          inline OutputIteratorT trim_left_copy_if(
              OutputIteratorT Output,
              const RangeT& Input,
- PredicateT IsSpace)
+ const PredicateT& IsSpace)
          {
              std::copy(
                  ::boost::algorithm::detail::trim_begin(
@@ -76,7 +76,7 @@
              \overload
          */
          template<typename SequenceT, typename PredicateT>
- inline SequenceT trim_left_copy_if(const SequenceT& Input, PredicateT IsSpace)
+ inline SequenceT trim_left_copy_if(const SequenceT& Input, const PredicateT& IsSpace)
          {
              return SequenceT(
                  ::boost::algorithm::detail::trim_begin(
@@ -98,7 +98,7 @@
              \note This function provides the strong exception-safety guarantee
          */
          template<typename SequenceT>
- inline SequenceT trim_left_copy(const SequenceT& Input, const std::locale& Loc=std::locale())
+ inline SequenceT trim_left_copy(const SequenceT& Input, const std::locale& Loc=detail::default_locale_)
          {
              return
                  trim_left_copy_if(
@@ -116,7 +116,7 @@
              \param IsSpace An unary predicate identifying spaces
          */
          template<typename SequenceT, typename PredicateT>
- inline void trim_left_if(SequenceT& Input, PredicateT IsSpace)
+ inline void trim_left_if(SequenceT& Input, const PredicateT& IsSpace)
          {
              Input.erase(
                  begin(Input),
@@ -135,7 +135,7 @@
              \param Loc A locale used for 'space' classification
          */
          template<typename SequenceT>
- inline void trim_left(SequenceT& Input, const std::locale& Loc=std::locale())
+ inline void trim_left(SequenceT& Input, const std::locale& Loc=detail::default_locale_)
          {
              trim_left_if(
                  Input,
@@ -164,7 +164,7 @@
          inline OutputIteratorT trim_right_copy_if(
              OutputIteratorT Output,
              const RangeT& Input,
- PredicateT IsSpace )
+ const PredicateT& IsSpace )
          {
              std::copy(
                  begin(Input),
@@ -182,7 +182,7 @@
              \overload
           */
          template<typename SequenceT, typename PredicateT>
- inline SequenceT trim_right_copy_if(const SequenceT& Input, PredicateT IsSpace)
+ inline SequenceT trim_right_copy_if(const SequenceT& Input, const PredicateT& IsSpace)
          {
              return SequenceT(
                  begin(Input),
@@ -205,7 +205,7 @@
              \note This function provides the strong exception-safety guarantee
          */
          template<typename SequenceT>
- inline SequenceT trim_right_copy(const SequenceT& Input, const std::locale& Loc=std::locale())
+ inline SequenceT trim_right_copy(const SequenceT& Input, const std::locale& Loc=detail::default_locale_)
          {
              return
                  trim_right_copy_if(
@@ -224,7 +224,7 @@
              \param IsSpace An unary predicate identifying spaces
          */
          template<typename SequenceT, typename PredicateT>
- inline void trim_right_if(SequenceT& Input, PredicateT IsSpace)
+ inline void trim_right_if(SequenceT& Input, const PredicateT& IsSpace)
          {
              Input.erase(
                  ::boost::algorithm::detail::trim_end(
@@ -245,7 +245,7 @@
              \param Loc A locale used for 'space' classification
          */
          template<typename SequenceT>
- inline void trim_right(SequenceT& Input, const std::locale& Loc=std::locale())
+ inline void trim_right(SequenceT& Input, const std::locale& Loc=detail::default_locale_)
          {
              trim_right_if(
                  Input,
@@ -274,7 +274,7 @@
          inline OutputIteratorT trim_copy_if(
              OutputIteratorT Output,
              const RangeT& Input,
- PredicateT IsSpace)
+ const PredicateT& IsSpace)
          {
              BOOST_STRING_TYPENAME
                  range_const_iterator<RangeT>::type TrimEnd=
@@ -298,7 +298,7 @@
              \overload
           */
          template<typename SequenceT, typename PredicateT>
- inline SequenceT trim_copy_if(const SequenceT& Input, PredicateT IsSpace)
+ inline SequenceT trim_copy_if(const SequenceT& Input, const PredicateT& IsSpace)
          {
              BOOST_STRING_TYPENAME
                  range_const_iterator<SequenceT>::type TrimEnd=
@@ -328,7 +328,7 @@
              \note This function provides the strong exception-safety guarantee
          */
          template<typename SequenceT>
- inline SequenceT trim_copy( const SequenceT& Input, const std::locale& Loc=std::locale() )
+ inline SequenceT trim_copy( const SequenceT& Input, const std::locale& Loc=detail::default_locale_)
          {
              return
                  trim_copy_if(
@@ -346,7 +346,7 @@
              \param IsSpace An unary predicate identifying spaces
          */
          template<typename SequenceT, typename PredicateT>
- inline void trim_if(SequenceT& Input, PredicateT IsSpace)
+ inline void trim_if(SequenceT& Input, const PredicateT& IsSpace)
          {
              trim_right_if( Input, IsSpace );
              trim_left_if( Input, IsSpace );
@@ -361,7 +361,7 @@
              \param Loc A locale used for 'space' classification
          */
          template<typename SequenceT>
- inline void trim(SequenceT& Input, const std::locale& Loc=std::locale())
+ inline void trim(SequenceT& Input, const std::locale& Loc=detail::default_locale_)
          {
              trim_if(
                  Input,

</snip>


Boost list run by bdawes at acm.org, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk