Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r61720 - in sandbox/hash: boost/hash boost/hash/detail libs/hash/example
From: me22.ca+boost_at_[hidden]
Date: 2010-04-30 23:23:27


Author: smcmurray
Date: 2010-04-30 23:23:25 EDT (Fri, 30 Apr 2010)
New Revision: 61720
URL: http://svn.boost.org/trac/boost/changeset/61720

Log:
hash: optimize preprocessor by adding an update that accepts more than one value and bypassing the value staging where possible; add an example that calculates digests of files like coreutils's md5sum, sha1sum, etc
Added:
   sandbox/hash/libs/hash/example/hashsum.cpp (contents, props changed)
Text files modified:
   sandbox/hash/boost/hash/compute_digest.hpp | 15 +++++
   sandbox/hash/boost/hash/detail/exploder.hpp | 25 ---------
   sandbox/hash/boost/hash/detail/imploder.hpp | 25 ---------
   sandbox/hash/boost/hash/digest.hpp | 17 ++++++
   sandbox/hash/boost/hash/pack.hpp | 103 ++++++++++++++++++++++++++++++++++-----
   sandbox/hash/boost/hash/stream_preprocessor.hpp | 31 +++++++++++
   6 files changed, 149 insertions(+), 67 deletions(-)

Modified: sandbox/hash/boost/hash/compute_digest.hpp
==============================================================================
--- sandbox/hash/boost/hash/compute_digest.hpp (original)
+++ sandbox/hash/boost/hash/compute_digest.hpp 2010-04-30 23:23:25 EDT (Fri, 30 Apr 2010)
@@ -9,6 +9,8 @@
 #ifndef BOOST_HASH_COMPUTE_DIGEST_HPP
 #define BOOST_HASH_COMPUTE_DIGEST_HPP
 
+#include <boost/static_assert.hpp>
+
 #include <iterator>
 #include <limits>
 
@@ -21,6 +23,7 @@
 typename hash_T::digest_type
 compute_digest(iter_T b, iter_T e) {
     typedef typename std::iterator_traits<iter_T>::value_type value_type;
+ BOOST_STATIC_ASSERT(std::numeric_limits<value_type>::is_specialized);
     unsigned const value_bits =
         std::numeric_limits<value_type>::digits +
         std::numeric_limits<value_type>::is_signed;
@@ -33,8 +36,16 @@
 
 template <typename hash_T, typename value_T>
 typename hash_T::digest_type
-compute_digest(value_T *p, size_t n) {
- return compute_digest<hash_T>(p, p+n);
+compute_digest(value_T const *p, size_t n) {
+ BOOST_STATIC_ASSERT(std::numeric_limits<value_T>::is_specialized);
+ unsigned const value_bits =
+ std::numeric_limits<value_T>::digits +
+ std::numeric_limits<value_T>::is_signed;
+ typedef typename hash_T::template stream_hash<value_bits>::type
+ stream_hash_type;
+ stream_hash_type sh;
+ sh.update_n(p, n);
+ return sh.end_message();
 }
 
 template <typename hash_T>

Modified: sandbox/hash/boost/hash/detail/exploder.hpp
==============================================================================
--- sandbox/hash/boost/hash/detail/exploder.hpp (original)
+++ sandbox/hash/boost/hash/detail/exploder.hpp 2010-04-30 23:23:25 EDT (Fri, 30 Apr 2010)
@@ -148,31 +148,6 @@
     static void explode1_array(OutputType &, unsigned &, IntputValue) {}
 };
 
-template <int UnitBits, int InputBits, int OutputBits,
- int k>
-struct exploder<stream_endian::host_unit<UnitBits>,
- InputBits, OutputBits, k> {
- BOOST_STATIC_ASSERT(!(InputBits % UnitBits) &&
- !(OutputBits % UnitBits));
- template <typename OutputValue, typename InputValue>
- static void step(OutputValue &z, InputValue x) {
- std::memcpy(&z, (char*)&x + k/CHAR_BIT, OutputBits/CHAR_BIT);
- }
- template <typename OutputType, typename InputValue>
- static void explode1_array(OutputType &out, unsigned &i, InputValue x) {
- step(out[i++], x);
- exploder<stream_endian::host_unit<UnitBits>,
- InputBits, OutputBits, k+OutputBits>
- ::explode1_array(out, i, x);
- }
-};
-template <int UnitBits, int InputBits, int OutputBits>
-struct exploder<stream_endian::host_unit<UnitBits>,
- InputBits, OutputBits, InputBits> {
- template <typename OutputType, typename IntputValue>
- static void explode1_array(OutputType &, unsigned &, IntputValue) {}
-};
-
 } // namespace detail
 } // namespace hash
 } // namespace boost

Modified: sandbox/hash/boost/hash/detail/imploder.hpp
==============================================================================
--- sandbox/hash/boost/hash/detail/imploder.hpp (original)
+++ sandbox/hash/boost/hash/detail/imploder.hpp 2010-04-30 23:23:25 EDT (Fri, 30 Apr 2010)
@@ -148,31 +148,6 @@
     static void implode1_array(InputType const &, unsigned &, OutputValue &) {}
 };
 
-template <int UnitBits, int InputBits, int OutputBits,
- int k>
-struct imploder<stream_endian::host_unit<UnitBits>,
- InputBits, OutputBits, k> {
- BOOST_STATIC_ASSERT(!(InputBits % UnitBits) &&
- !(OutputBits % UnitBits));
- template <typename InputValue, typename OutputValue>
- static void step(InputValue z, OutputValue &x) {
- std::memcpy((char*)&x + k/CHAR_BIT, &z, InputBits/CHAR_BIT);
- }
- template <typename InputType, typename OutputValue>
- static void implode1_array(InputType const &in, unsigned &i, OutputValue &x) {
- step(in[i++], x);
- imploder<stream_endian::host_unit<UnitBits>,
- InputBits, OutputBits, k+InputBits>
- ::implode1_array(in, i, x);
- }
-};
-template <int UnitBits, int InputBits, int OutputBits>
-struct imploder<stream_endian::host_unit<UnitBits>,
- InputBits, OutputBits, OutputBits> {
- template <typename InputType, typename OutputValue>
- static void implode1_array(InputType const &, unsigned &, OutputValue &) {}
-};
-
 } // namespace detail
 } // namespace hash
 } // namespace boost

Modified: sandbox/hash/boost/hash/digest.hpp
==============================================================================
--- sandbox/hash/boost/hash/digest.hpp (original)
+++ sandbox/hash/boost/hash/digest.hpp 2010-04-30 23:23:25 EDT (Fri, 30 Apr 2010)
@@ -14,6 +14,10 @@
 #include <boost/hash/pack.hpp>
 #include <boost/static_assert.hpp>
 
+#include <iterator>
+#include <ostream>
+#include <string>
+
 #include <cstring>
 
 namespace boost {
@@ -52,6 +56,12 @@
         return it;
     }
 
+ std::string
+ str() const {
+ cstring_type cstr = cstring();
+ return std::string(cstr.data(), cstr.size()-1);
+ }
+
     cstring_type
     cstring() const {
         cstring_type s;
@@ -126,6 +136,13 @@
     return a == b;
 }
 
+template <unsigned DB>
+std::ostream &
+operator<<(std::ostream &sink, digest<DB> const &d) {
+ d.ascii(std::ostream_iterator<char>(sink));
+ return sink;
+};
+
 } // namespace hash
 } // namespace boost
 

Modified: sandbox/hash/boost/hash/pack.hpp
==============================================================================
--- sandbox/hash/boost/hash/pack.hpp (original)
+++ sandbox/hash/boost/hash/pack.hpp 2010-04-30 23:23:25 EDT (Fri, 30 Apr 2010)
@@ -34,15 +34,25 @@
                    Bits, Bits,
                    false, false> {
 
- template <typename OutputType, typename InputType>
- static OutputType pack_array(InputType const &in) {
+ template <typename InputType, typename OutputType>
+ static void pack_array(InputType const &in, OutputType &out) {
         BOOST_STATIC_ASSERT(OutputType::static_size == InputType::static_size);
- OutputType out;
         unsigned i = 0;
         for (unsigned j = 0; j < InputType::static_size; ++j) {
             out[i++] = in[j];
         }
- return out;
+ BOOST_ASSERT(i == OutputType::static_size);
+ }
+
+ template <typename InputType, typename OutputType>
+ static void pack_n(InputType const *in, size_t in_n,
+ OutputType *out, size_t out_n) {
+ BOOST_ASSERT(in_n == out_n);
+ unsigned i = 0;
+ for (unsigned j = 0; j < in_n; ++j) {
+ out[i++] = in[j];
+ }
+ BOOST_ASSERT(i == out_n);
     }
 
 };
@@ -55,18 +65,28 @@
 
     BOOST_STATIC_ASSERT(InputBits % OutputBits == 0);
 
- template <typename OutputType, typename InputType>
- static OutputType pack_array(InputType const &in) {
+ template <typename InputType, typename OutputType>
+ static void pack_array(InputType const &in, OutputType &out) {
         BOOST_STATIC_ASSERT(OutputType::static_size*OutputBits ==
                             InputType::static_size*InputBits);
- OutputType out;
         unsigned i = 0;
         for (unsigned j = 0; j < InputType::static_size; ++j) {
             detail::exploder<Endianness, InputBits, OutputBits>
              ::explode1_array(out, i, in[j]);
         }
         BOOST_ASSERT(i == OutputType::static_size);
- return out;
+ }
+
+ template <typename InputType, typename OutputType>
+ static void pack_n(InputType const *in, size_t in_n,
+ OutputType *out, size_t out_n) {
+ BOOST_ASSERT(InputBits*in_n == OutputBits*out_n);
+ unsigned i = 0;
+ for (unsigned j = 0; j < in_n; ++j) {
+ detail::exploder<Endianness, InputBits, OutputBits>
+ ::explode1_array(out, i, in[j]);
+ }
+ BOOST_ASSERT(i == out_n);
     }
 
 };
@@ -79,22 +99,71 @@
 
     BOOST_STATIC_ASSERT(OutputBits % InputBits == 0);
 
- template <typename OutputType, typename InputType>
- static OutputType pack_array(InputType const &in) {
+ template <typename InputType, typename OutputType>
+ static void pack_array(InputType const &in, OutputType &out) {
         BOOST_STATIC_ASSERT(OutputType::static_size*OutputBits ==
                             InputType::static_size*InputBits);
- OutputType out;
         unsigned i = 0;
         for (unsigned j = 0; j < OutputType::static_size; ++j) {
             detail::imploder<Endianness, InputBits, OutputBits>
              ::implode1_array(in, i, out[j] = 0);
         }
         BOOST_ASSERT(i == InputType::static_size);
- return out;
+ }
+
+ template <typename InputType, typename OutputType>
+ static void pack_n(InputType const *in, size_t in_n,
+ OutputType *out, size_t out_n) {
+ BOOST_ASSERT(InputBits*in_n == OutputBits*out_n);
+ unsigned i = 0;
+ for (unsigned j = 0; j < out_n; ++j) {
+ detail::imploder<Endianness, InputBits, OutputBits>
+ ::implode1_array(in, i, out[j] = 0);
+ }
+ BOOST_ASSERT(i == in_n);
     }
 
 };
 
+template <int UnitBits, int InputBits, int OutputBits>
+struct real_packer<stream_endian::host_unit<UnitBits>,
+ InputBits, OutputBits,
+ true, true> {
+
+ BOOST_STATIC_ASSERT(!(InputBits % UnitBits) &&
+ !(OutputBits % UnitBits));
+
+ template <typename InputType, typename OutputType>
+ static void pack_array(InputType const &in, OutputType &out) {
+ BOOST_STATIC_ASSERT(OutputType::static_size*OutputBits ==
+ InputType::static_size*InputBits);
+ std::memcpy(&out[0], &in[0], InputType::static_size*InputBits/CHAR_BIT);
+ }
+
+ template <typename InputType, typename OutputType>
+ static void pack_n(InputType const *in, size_t in_n,
+ OutputType *out, size_t out_n) {
+ BOOST_ASSERT(sizeof(InputType)*in_n == sizeof(OutputType)*out_n);
+ BOOST_ASSERT(InputBits*in_n == OutputBits*out_n);
+ std::memcpy(&out[0], &in[0], InputBits*in_n/CHAR_BIT);
+ }
+
+};
+template <int UnitBits, int InputBits, int OutputBits>
+struct real_packer<stream_endian::host_unit<UnitBits>,
+ InputBits, OutputBits,
+ false, true>
+ : real_packer<stream_endian::host_unit<UnitBits>,
+ InputBits, OutputBits,
+ true, true> {};
+template <int UnitBits, int InputBits, int OutputBits>
+struct real_packer<stream_endian::host_unit<UnitBits>,
+ InputBits, OutputBits,
+ true, false>
+ : real_packer<stream_endian::host_unit<UnitBits>,
+ InputBits, OutputBits,
+ true, true> {};
+
 // Forward if nothing better matches
 template <typename Endianness,
           int InputBits, int OutputBits,
@@ -150,7 +219,15 @@
           typename T1, typename T2>
 void pack(T1 const &in, T2 &out) {
     typedef packer<Endianness, InBits, OutBits> packer_type;
- out = packer_type::template pack_array<T2>(in);
+ packer_type::pack_array(in, out);
+}
+
+template <typename Endianness,
+ int InBits, int OutBits,
+ typename T1, typename T2>
+void pack(T1 const *in, size_t in_n, T2 *out, size_t out_n) {
+ typedef packer<Endianness, InBits, OutBits> packer_type;
+ packer_type::pack_n(in, in_n, out, out_n);
 }
 
 } // namespace hash

Modified: sandbox/hash/boost/hash/stream_preprocessor.hpp
==============================================================================
--- sandbox/hash/boost/hash/stream_preprocessor.hpp (original)
+++ sandbox/hash/boost/hash/stream_preprocessor.hpp 2010-04-30 23:23:25 EDT (Fri, 30 Apr 2010)
@@ -42,8 +42,8 @@
     static unsigned const value_bits = value_bits_;
     typedef typename uint_t<value_bits>::least value_type;
     BOOST_STATIC_ASSERT(word_bits % value_bits == 0);
- typedef array<value_type, block_bits/value_bits>
- value_array_type;
+ static unsigned const block_values = block_bits/value_bits;
+ typedef array<value_type, block_values> value_array_type;
 
   private:
 
@@ -109,6 +109,33 @@
         }
         return *this;
     }
+ template <typename T>
+ stream_preprocessor &update_n(T const *p_, size_t n) {
+ BOOST_STATIC_ASSERT(sizeof(T) == sizeof(value_type));
+ value_type const *p = reinterpret_cast<value_type const *>(p_);
+#ifndef BOOST_HASH_NO_OPTIMIZATION
+ for ( ; n && (seen % block_bits); --n, ++p) {
+ update(*p);
+ }
+ for ( ; n >= block_values; n -= block_values, p += block_values) {
+ // Convert the input into words
+ block_type block;
+ pack<endian, value_bits, word_bits>(p, block_values,
+ &block[0], block_words);
+
+ // Process the block
+ block_hash.update(block);
+ seen += block_bits;
+
+ // Reset seen if we don't need to track the length
+ if (!length_bits) seen = 0;
+ }
+#endif
+ for ( ; n; --n, ++p) {
+ update(*p);
+ }
+ return *this;
+ }
     digest_type end_message() {
         length_type length = seen;
 

Added: sandbox/hash/libs/hash/example/hashsum.cpp
==============================================================================
--- (empty file)
+++ sandbox/hash/libs/hash/example/hashsum.cpp 2010-04-30 23:23:25 EDT (Fri, 30 Apr 2010)
@@ -0,0 +1,73 @@
+
+#ifndef HASH
+#define HASH cubehash<80*4>
+#endif
+
+#include <boost/hash.hpp>
+
+#define XSTR(s) STR(s)
+#define STR(s) #s
+
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <string>
+
+#ifdef USE_MMAP
+#include <fcntl.h>
+#include <sys/mman.h>
+#endif
+
+typedef boost::hash::HASH hash_policy;
+
+hash_policy::digest_type
+hash_streambuf(std::streambuf *buf) {
+ return boost::hash::compute_digest<hash_policy>(
+ std::istreambuf_iterator<char>(buf),
+ std::istreambuf_iterator<char>()
+ );
+}
+
+hash_policy::digest_type
+hash_memory(void *buf, size_t n) {
+ return boost::hash::compute_digest<hash_policy>((char*)buf, n);
+}
+
+std::ostream &
+do_istream(std::ostream &sink, std::string const &fn, std::istream &source) {
+ return sink << hash_streambuf(source.rdbuf())
+ << " " << fn << std::endl;
+}
+
+#ifdef USE_MMAP
+std::ostream &
+do_memory(std::ostream &sink, std::string const &fn, void *buf, size_t n) {
+ return sink << hash_memory(buf, n)
+ << " " << fn << std::endl;
+}
+#endif
+
+std::ostream &
+do_file(std::ostream &sink, std::string const &fn) {
+#ifdef USE_MMAP
+ int fd = open(fn.c_str(), O_RDONLY | O_NOATIME);
+ size_t len = lseek(fd, 0, SEEK_END);
+ void *p = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
+ return do_memory(sink, fn, p, len);
+#else
+ std::ifstream f(fn.c_str());
+ return do_istream(sink, fn, f);
+#endif
+}
+
+int main(int argc, char **argv) {
+ std::cerr << "Using boost::hash::" XSTR(HASH) "\n";
+ if (argc < 2) {
+ do_istream(std::cout, "-", std::cin);
+ } else {
+ while (--argc) {
+ do_file(std::cout, *++argv);
+ }
+ }
+}
+


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk