|
Boost-Commit : |
Subject: [Boost-commit] svn:boost r55559 - in sandbox: boost/mapreduce boost/mapreduce/intermediates libs/mapreduce/examples/wordcount
From: cdm.henderson_at_[hidden]
Date: 2009-08-12 18:13:33
Author: chenderson
Date: 2009-08-12 18:13:32 EDT (Wed, 12 Aug 2009)
New Revision: 55559
URL: http://svn.boost.org/trac/boost/changeset/55559
Log:
Custom comparitor for intermediate keys
WordCount application optimisations
Text files modified:
sandbox/boost/mapreduce/intermediates/in_memory.hpp | 6 ++-
sandbox/boost/mapreduce/job.hpp | 7 ++-
sandbox/libs/mapreduce/examples/wordcount/wordcount.cpp | 79 +++++++++++++++++++++++++--------------
sandbox/libs/mapreduce/examples/wordcount/wordcount.vcproj | 2
4 files changed, 59 insertions(+), 35 deletions(-)
Modified: sandbox/boost/mapreduce/intermediates/in_memory.hpp
==============================================================================
--- sandbox/boost/mapreduce/intermediates/in_memory.hpp (original)
+++ sandbox/boost/mapreduce/intermediates/in_memory.hpp 2009-08-12 18:13:32 EDT (Wed, 12 Aug 2009)
@@ -44,7 +44,8 @@
template<
typename MapTask,
typename ReduceTask,
- typename PartitionFn=mapreduce::hash_partitioner>
+ typename PartitionFn=mapreduce::hash_partitioner,
+ typename KeyCompare=std::less<typename ReduceTask::key_type> >
class in_memory
{
private:
@@ -52,7 +53,8 @@
std::vector<
std::map<
typename ReduceTask::key_type,
- std::list<typename ReduceTask::value_type> > >
+ std::list<typename ReduceTask::value_type>,
+ KeyCompare > >
intermediates_t;
public:
Modified: sandbox/boost/mapreduce/job.hpp
==============================================================================
--- sandbox/boost/mapreduce/job.hpp (original)
+++ sandbox/boost/mapreduce/job.hpp 2009-08-12 18:13:32 EDT (Wed, 12 Aug 2009)
@@ -23,8 +23,8 @@
class map_task
{
public:
- typedef MapKey key_type;
- typedef MapValue value_type;
+ typedef MapKey key_type;
+ typedef MapValue value_type;
};
template<typename ReduceKey, typename ReduceValue>
@@ -69,7 +69,8 @@
}
// 'value' parameter is not a reference to const to enable streams to be passed
- map_task_runner &operator()(typename map_task_type::key_type const &key, typename map_task_type::value_type &value)
+ map_task_runner &operator()(typename map_task_type::key_type const &key,
+ typename map_task_type::value_type &value)
{
map_task_type::map(*this, key, value);
Modified: sandbox/libs/mapreduce/examples/wordcount/wordcount.cpp
==============================================================================
--- sandbox/libs/mapreduce/examples/wordcount/wordcount.cpp (original)
+++ sandbox/libs/mapreduce/examples/wordcount/wordcount.cpp 2009-08-12 18:13:32 EDT (Wed, 12 Aug 2009)
@@ -9,18 +9,20 @@
//
// For more information, see http://www.boost.org/libs/mapreduce/
//
-
+
+#define BOOST_DISABLE_ASSERTS
#if !defined(_DEBUG) && !defined(BOOST_DISABLE_ASSERTS)
# pragma message("Warning: BOOST_DISABLE_ASSERTS not defined")
#endif
#include <boost/config.hpp>
#if defined(BOOST_MSVC)
-# pragma warning(disable: 4244 4512 4267)
+# pragma warning(disable: 4244 4512 4267 4996)
#endif
#include <boost/mapreduce.hpp>
#include <numeric> // accumulate
+#include <boost/algorithm/string.hpp>
#if defined(BOOST_MSVC) && defined(_DEBUG)
#include <crtdbg.h>
@@ -29,11 +31,11 @@
namespace wordcount {
struct map_task : public boost::mapreduce::map_task<
- std::string, // MapKey
- std::pair<char const *, char const *> > // MapValue
+ std::string, // MapKey (filename)
+ std::pair<char const *, char const *> > // MapValue (memory mapped file contents)
{
template<typename Runtime>
- static void map(Runtime &runtime, std::string const &/*key*/, value_type &value)
+ static void map(Runtime &runtime, key_type const &/*key*/, value_type &value)
{
bool in_word = false;
char const *ptr = value.first;
@@ -46,41 +48,29 @@
{
if ((ch < 'A' || ch > 'Z') && ch != '\'')
{
- std::string w(word,ptr-word);
- std::transform(w.begin(), w.end(), w.begin(),
- std::bind1st(
- std::mem_fun(&std::ctype<char>::tolower),
- &std::use_facet<std::ctype<char> >(std::locale::classic())));
- runtime.emit_intermediate(w, 1);
+ runtime.emit_intermediate(std::make_pair(word,ptr-word), 1);
in_word = false;
}
}
- else
+ else if (ch >= 'A' && ch <= 'Z')
{
- if (ch >= 'A' && ch <= 'Z')
- {
- word = ptr;
- in_word = true;
- }
+ word = ptr;
+ in_word = true;
}
}
if (in_word)
- {
- BOOST_ASSERT(ptr-word > 0);
- std::string w(word,ptr-word);
- std::transform(w.begin(), w.end(), w.begin(),
- std::bind1st(
- std::mem_fun(&std::ctype<char>::tolower),
- &std::use_facet<std::ctype<char> >(std::locale::classic())));
- runtime.emit_intermediate(w, 1);
- }
+ runtime.emit_intermediate(std::make_pair(word,ptr-word), 1);
}
};
-struct reduce_task : public boost::mapreduce::reduce_task<std::string, unsigned>
+typedef std::pair<char const *, std::ptrdiff_t> reduce_key_t;
+
+struct reduce_task : public boost::mapreduce::reduce_task<
+ reduce_key_t,
+ unsigned>
{
template<typename Runtime, typename It>
- static void reduce(Runtime &runtime, std::string const &key, It it, It const ite)
+ static void reduce(Runtime &runtime, key_type const &key, It it, It const ite)
{
runtime.emit(key, std::accumulate(it, ite, 0));
}
@@ -130,6 +120,37 @@
} // namespace wordcount
+template<>
+bool std::less<wordcount::reduce_key_t>::operator()(wordcount::reduce_key_t const &first, wordcount::reduce_key_t const &second) const
+{
+ std::ptrdiff_t const len = std::min(first.second, second.second);
+ int const cmp = strnicmp(first.first, second.first, len);
+ if (cmp < 0)
+ return true;
+ else if (cmp > 0)
+ return false;
+
+ return (first.second < second.second);
+}
+
+template<>
+bool std::operator==(wordcount::reduce_key_t const &first, wordcount::reduce_key_t const &second)
+{
+ if (first.second != second.second)
+ return false;
+ else if (first.second == 0 && first.first == 0 && second.first == 0)
+ return true;
+
+ return (strnicmp(first.first, second.first, first.second) == 0);
+}
+
+template<>
+unsigned boost::mapreduce::hash_partitioner::operator()(wordcount::reduce_key_t const &key, unsigned partitions) const
+{
+ return boost::hash_range(key.first, key.first+key.second) % partitions;
+}
+
+
int main(int argc, char **argv)
{
@@ -215,7 +236,7 @@
frequencies.sort(boost::mapreduce::detail::greater_2nd<wordcount::job::keyvalue_t>);
std::cout << "\n\nMapReduce results:";
for (frequencies_t::const_iterator freq=frequencies.begin(); freq!=frequencies.end(); ++freq)
- std::cout << "\n" << freq->first << "\t" << freq->second;
+ printf("\n%.*s\t%d", freq->first.second, freq->first.first, freq->second);
}
}
catch (std::exception &e)
Modified: sandbox/libs/mapreduce/examples/wordcount/wordcount.vcproj
==============================================================================
--- sandbox/libs/mapreduce/examples/wordcount/wordcount.vcproj (original)
+++ sandbox/libs/mapreduce/examples/wordcount/wordcount.vcproj 2009-08-12 18:13:32 EDT (Wed, 12 Aug 2009)
@@ -121,7 +121,7 @@
Name="VCCLCompilerTool"
InlineFunctionExpansion="2"
AdditionalIncludeDirectories=""
- PreprocessorDefinitions="WIN32_LEAN_AND_MEAN;BOOST_LIB_DIAGNOSTIC"
+ PreprocessorDefinitions="WIN32_LEAN_AND_MEAN"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="4"
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk