Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r55559 - in sandbox: boost/mapreduce boost/mapreduce/intermediates libs/mapreduce/examples/wordcount
From: cdm.henderson_at_[hidden]
Date: 2009-08-12 18:13:33


Author: chenderson
Date: 2009-08-12 18:13:32 EDT (Wed, 12 Aug 2009)
New Revision: 55559
URL: http://svn.boost.org/trac/boost/changeset/55559

Log:
Custom comparitor for intermediate keys
WordCount application optimisations
Text files modified:
   sandbox/boost/mapreduce/intermediates/in_memory.hpp | 6 ++-
   sandbox/boost/mapreduce/job.hpp | 7 ++-
   sandbox/libs/mapreduce/examples/wordcount/wordcount.cpp | 79 +++++++++++++++++++++++++--------------
   sandbox/libs/mapreduce/examples/wordcount/wordcount.vcproj | 2
   4 files changed, 59 insertions(+), 35 deletions(-)

Modified: sandbox/boost/mapreduce/intermediates/in_memory.hpp
==============================================================================
--- sandbox/boost/mapreduce/intermediates/in_memory.hpp (original)
+++ sandbox/boost/mapreduce/intermediates/in_memory.hpp 2009-08-12 18:13:32 EDT (Wed, 12 Aug 2009)
@@ -44,7 +44,8 @@
 template<
     typename MapTask,
     typename ReduceTask,
- typename PartitionFn=mapreduce::hash_partitioner>
+ typename PartitionFn=mapreduce::hash_partitioner,
+ typename KeyCompare=std::less<typename ReduceTask::key_type> >
 class in_memory
 {
   private:
@@ -52,7 +53,8 @@
     std::vector<
         std::map<
             typename ReduceTask::key_type,
- std::list<typename ReduceTask::value_type> > >
+ std::list<typename ReduceTask::value_type>,
+ KeyCompare > >
     intermediates_t;
 
   public:

Modified: sandbox/boost/mapreduce/job.hpp
==============================================================================
--- sandbox/boost/mapreduce/job.hpp (original)
+++ sandbox/boost/mapreduce/job.hpp 2009-08-12 18:13:32 EDT (Wed, 12 Aug 2009)
@@ -23,8 +23,8 @@
 class map_task
 {
   public:
- typedef MapKey key_type;
- typedef MapValue value_type;
+ typedef MapKey key_type;
+ typedef MapValue value_type;
 };
 
 template<typename ReduceKey, typename ReduceValue>
@@ -69,7 +69,8 @@
         }
 
         // 'value' parameter is not a reference to const to enable streams to be passed
- map_task_runner &operator()(typename map_task_type::key_type const &key, typename map_task_type::value_type &value)
+ map_task_runner &operator()(typename map_task_type::key_type const &key,
+ typename map_task_type::value_type &value)
         {
             map_task_type::map(*this, key, value);
 

Modified: sandbox/libs/mapreduce/examples/wordcount/wordcount.cpp
==============================================================================
--- sandbox/libs/mapreduce/examples/wordcount/wordcount.cpp (original)
+++ sandbox/libs/mapreduce/examples/wordcount/wordcount.cpp 2009-08-12 18:13:32 EDT (Wed, 12 Aug 2009)
@@ -9,18 +9,20 @@
 //
 // For more information, see http://www.boost.org/libs/mapreduce/
 //
-
+
+#define BOOST_DISABLE_ASSERTS
 #if !defined(_DEBUG) && !defined(BOOST_DISABLE_ASSERTS)
 # pragma message("Warning: BOOST_DISABLE_ASSERTS not defined")
 #endif
 
 #include <boost/config.hpp>
 #if defined(BOOST_MSVC)
-# pragma warning(disable: 4244 4512 4267)
+# pragma warning(disable: 4244 4512 4267 4996)
 #endif
 
 #include <boost/mapreduce.hpp>
 #include <numeric> // accumulate
+#include <boost/algorithm/string.hpp>
 
 #if defined(BOOST_MSVC) && defined(_DEBUG)
 #include <crtdbg.h>
@@ -29,11 +31,11 @@
 namespace wordcount {
 
 struct map_task : public boost::mapreduce::map_task<
- std::string, // MapKey
- std::pair<char const *, char const *> > // MapValue
+ std::string, // MapKey (filename)
+ std::pair<char const *, char const *> > // MapValue (memory mapped file contents)
 {
     template<typename Runtime>
- static void map(Runtime &runtime, std::string const &/*key*/, value_type &value)
+ static void map(Runtime &runtime, key_type const &/*key*/, value_type &value)
     {
         bool in_word = false;
         char const *ptr = value.first;
@@ -46,41 +48,29 @@
             {
                 if ((ch < 'A' || ch > 'Z') && ch != '\'')
                 {
- std::string w(word,ptr-word);
- std::transform(w.begin(), w.end(), w.begin(),
- std::bind1st(
- std::mem_fun(&std::ctype<char>::tolower),
- &std::use_facet<std::ctype<char> >(std::locale::classic())));
- runtime.emit_intermediate(w, 1);
+ runtime.emit_intermediate(std::make_pair(word,ptr-word), 1);
                     in_word = false;
                 }
             }
- else
+ else if (ch >= 'A' && ch <= 'Z')
             {
- if (ch >= 'A' && ch <= 'Z')
- {
- word = ptr;
- in_word = true;
- }
+ word = ptr;
+ in_word = true;
             }
         }
         if (in_word)
- {
- BOOST_ASSERT(ptr-word > 0);
- std::string w(word,ptr-word);
- std::transform(w.begin(), w.end(), w.begin(),
- std::bind1st(
- std::mem_fun(&std::ctype<char>::tolower),
- &std::use_facet<std::ctype<char> >(std::locale::classic())));
- runtime.emit_intermediate(w, 1);
- }
+ runtime.emit_intermediate(std::make_pair(word,ptr-word), 1);
     }
 };
 
-struct reduce_task : public boost::mapreduce::reduce_task<std::string, unsigned>
+typedef std::pair<char const *, std::ptrdiff_t> reduce_key_t;
+
+struct reduce_task : public boost::mapreduce::reduce_task<
+ reduce_key_t,
+ unsigned>
 {
     template<typename Runtime, typename It>
- static void reduce(Runtime &runtime, std::string const &key, It it, It const ite)
+ static void reduce(Runtime &runtime, key_type const &key, It it, It const ite)
     {
         runtime.emit(key, std::accumulate(it, ite, 0));
     }
@@ -130,6 +120,37 @@
 } // namespace wordcount
 
 
+template<>
+bool std::less<wordcount::reduce_key_t>::operator()(wordcount::reduce_key_t const &first, wordcount::reduce_key_t const &second) const
+{
+ std::ptrdiff_t const len = std::min(first.second, second.second);
+ int const cmp = strnicmp(first.first, second.first, len);
+ if (cmp < 0)
+ return true;
+ else if (cmp > 0)
+ return false;
+
+ return (first.second < second.second);
+}
+
+template<>
+bool std::operator==(wordcount::reduce_key_t const &first, wordcount::reduce_key_t const &second)
+{
+ if (first.second != second.second)
+ return false;
+ else if (first.second == 0 && first.first == 0 && second.first == 0)
+ return true;
+
+ return (strnicmp(first.first, second.first, first.second) == 0);
+}
+
+template<>
+unsigned boost::mapreduce::hash_partitioner::operator()(wordcount::reduce_key_t const &key, unsigned partitions) const
+{
+ return boost::hash_range(key.first, key.first+key.second) % partitions;
+}
+
+
 
 int main(int argc, char **argv)
 {
@@ -215,7 +236,7 @@
             frequencies.sort(boost::mapreduce::detail::greater_2nd<wordcount::job::keyvalue_t>);
             std::cout << "\n\nMapReduce results:";
             for (frequencies_t::const_iterator freq=frequencies.begin(); freq!=frequencies.end(); ++freq)
- std::cout << "\n" << freq->first << "\t" << freq->second;
+ printf("\n%.*s\t%d", freq->first.second, freq->first.first, freq->second);
         }
     }
     catch (std::exception &e)

Modified: sandbox/libs/mapreduce/examples/wordcount/wordcount.vcproj
==============================================================================
--- sandbox/libs/mapreduce/examples/wordcount/wordcount.vcproj (original)
+++ sandbox/libs/mapreduce/examples/wordcount/wordcount.vcproj 2009-08-12 18:13:32 EDT (Wed, 12 Aug 2009)
@@ -121,7 +121,7 @@
                                 Name="VCCLCompilerTool"
                                 InlineFunctionExpansion="2"
                                 AdditionalIncludeDirectories=""
- PreprocessorDefinitions="WIN32_LEAN_AND_MEAN;BOOST_LIB_DIAGNOSTIC"
+ PreprocessorDefinitions="WIN32_LEAN_AND_MEAN"
                                 RuntimeLibrary="2"
                                 UsePrecompiledHeader="0"
                                 WarningLevel="4"


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk