Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r73653 - in sandbox/bloom_filter/trunk/boost/bloom_filter: . detail
From: cpp.cabrera_at_[hidden]
Date: 2011-08-11 10:03:39


Author: alejandro
Date: 2011-08-11 10:03:37 EDT (Thu, 11 Aug 2011)
New Revision: 73653
URL: http://svn.boost.org/trac/boost/changeset/73653

Log:
New data structure:

  - twohash_counting_bloom_filter:
    * Cloned from counting_bloom_filter, uses the same interface
      with similar template parameters to other twohash variants.
    * Removed union/intersect operations - experimental operations
      are not useful to expose.
    * Implemented an apply_hash algorithm suitable for
      twohash_counting_bloom_filter.

General change to all Bloom filters:

  - Added function data() to give access to raw bits.
Added:
   sandbox/bloom_filter/trunk/boost/bloom_filter/detail/twohash_counting_apply_hash.hpp (contents, props changed)
   sandbox/bloom_filter/trunk/boost/bloom_filter/twohash_counting_bloom_filter.hpp
      - copied, changed from r73639, /sandbox/bloom_filter/trunk/boost/bloom_filter/counting_bloom_filter.hpp
Text files modified:
   sandbox/bloom_filter/trunk/boost/bloom_filter/basic_bloom_filter.hpp | 6
   sandbox/bloom_filter/trunk/boost/bloom_filter/counting_bloom_filter.hpp | 6
   sandbox/bloom_filter/trunk/boost/bloom_filter/dynamic_bloom_filter.hpp | 6
   sandbox/bloom_filter/trunk/boost/bloom_filter/dynamic_counting_bloom_filter.hpp | 6
   sandbox/bloom_filter/trunk/boost/bloom_filter/twohash_basic_bloom_filter.hpp | 6
   sandbox/bloom_filter/trunk/boost/bloom_filter/twohash_counting_bloom_filter.hpp | 244 +++++++++++++++++++--------------------
   sandbox/bloom_filter/trunk/boost/bloom_filter/twohash_dynamic_basic_bloom_filter.hpp | 6
   7 files changed, 155 insertions(+), 125 deletions(-)

Modified: sandbox/bloom_filter/trunk/boost/bloom_filter/basic_bloom_filter.hpp
==============================================================================
--- sandbox/bloom_filter/trunk/boost/bloom_filter/basic_bloom_filter.hpp (original)
+++ sandbox/bloom_filter/trunk/boost/bloom_filter/basic_bloom_filter.hpp 2011-08-11 10:03:37 EDT (Thu, 11 Aug 2011)
@@ -84,6 +84,12 @@
         return this->count() == 0;
       }
 
+ const bitset_type&
+ data() const
+ {
+ return this->bits;
+ }
+
       void insert(const T& t) {
         static const unsigned N = mpl::size<HashFunctions>::value - 1;
         detail::apply_hash<N,

Modified: sandbox/bloom_filter/trunk/boost/bloom_filter/counting_bloom_filter.hpp
==============================================================================
--- sandbox/bloom_filter/trunk/boost/bloom_filter/counting_bloom_filter.hpp (original)
+++ sandbox/bloom_filter/trunk/boost/bloom_filter/counting_bloom_filter.hpp 2011-08-11 10:03:37 EDT (Thu, 11 Aug 2011)
@@ -177,6 +177,12 @@
         return this->count() == 0;
       }
 
+ const bucket_type&
+ data() const
+ {
+ return this->bits;
+ }
+
       //! core ops
       void insert(const T& t)
       {

Added: sandbox/bloom_filter/trunk/boost/bloom_filter/detail/twohash_counting_apply_hash.hpp
==============================================================================
--- (empty file)
+++ sandbox/bloom_filter/trunk/boost/bloom_filter/detail/twohash_counting_apply_hash.hpp 2011-08-11 10:03:37 EDT (Thu, 11 Aug 2011)
@@ -0,0 +1,134 @@
+//////////////////////////////////////////////////////////////////////////////
+//
+// (C) Copyright Alejandro Cabrera 2011.
+// Distributed under the Boost
+// Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or
+// copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+// See http://www.boost.org/libs/bloom_filter for documentation.
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#ifndef BOOST_BLOOM_FILTER_TWOHASH_COUNTING_APPLY_HASH_HPP
+#define BOOST_BLOOM_FILTER_TWOHASH_COUNTING_APPLY_HASH_HPP
+
+#include <boost/bloom_filter/detail/exceptions.hpp>
+
+namespace boost {
+ namespace bloom_filters {
+ namespace detail {
+
+ struct decrement {
+ size_t operator()(const size_t val, const size_t limit) {
+ if (val == limit)
+ throw bin_underflow_exception();
+
+ return val - 1;
+ }
+ };
+
+ struct increment {
+ size_t operator()(const size_t val, const size_t limit) {
+ if (val == limit)
+ throw bin_overflow_exception();
+
+ return val + 1;
+ }
+ };
+
+ template <size_t N, typename CBF, typename Op = void>
+ struct BloomOp {
+ typedef typename CBF::hash_function1_type hash_function1_type;
+ typedef typename CBF::hash_function2_type hash_function2_type;
+ typedef typename CBF::extension_function_type extension_function_type;
+
+ BloomOp(const typename CBF::value_type& t)
+ : hash1_val(hash1(t)),
+ hash2_val(hash2(t))
+ {
+ }
+
+ void update(typename CBF::bucket_type& slots,
+ const size_t num_bins,
+ const size_t limit)
+ {
+ static Op op;
+
+ for (size_t i = 0; i < N; ++i) {
+ const size_t hash =
+ (hash1_val + i * hash2_val + ext(i)) % num_bins;
+ const size_t pos = hash / CBF::bins_per_slot();
+ const size_t offset_bits =
+ (hash % CBF::bins_per_slot()) * CBF::bits_per_bin();
+ const size_t target_bits =
+ (slots[pos] >> offset_bits) & CBF::mask();
+
+ const size_t final_bits = op(target_bits, limit);
+ slots[pos] &= ~(CBF::mask() << offset_bits);
+ slots[pos] |= (final_bits << offset_bits);
+ }
+ }
+
+ bool check(const typename CBF::bucket_type& slots,
+ const size_t num_bins)
+ {
+ for (size_t i = 0; i < N; ++i) {
+ const size_t hash =
+ (hash1_val + i * hash2_val + ext(i)) % num_bins;
+ const size_t pos = hash / CBF::bins_per_slot();
+ const size_t offset_bits =
+ (hash % CBF::bins_per_slot()) * CBF::bits_per_bin();
+ const size_t target_bits =
+ (slots[pos] >> offset_bits) & CBF::mask();
+
+ if (target_bits == 0)
+ return false;
+ }
+
+ return true;
+ }
+
+ size_t hash1_val;
+ size_t hash2_val;
+ hash_function1_type hash1;
+ hash_function2_type hash2;
+ extension_function_type ext;
+ };
+
+ // CBF : Counting Bloom Filter
+ template <size_t N,
+ class CBF>
+ struct twohash_counting_apply_hash
+ {
+ static void insert(const typename CBF::value_type& t,
+ typename CBF::bucket_type& slots,
+ const size_t num_bins)
+ {
+ BloomOp<N, CBF, increment> inserter(t);
+ inserter.update(slots, num_bins,
+ (static_cast<size_t>(1) << CBF::bits_per_bin()) - 1);
+ }
+
+ static void remove(const typename CBF::value_type& t,
+ typename CBF::bucket_type& slots,
+ const size_t num_bins)
+ {
+ BloomOp<N, CBF, decrement> remover(t);
+ remover.update(slots, num_bins, 0);
+ }
+
+ static bool contains(const typename CBF::value_type& t,
+ const typename CBF::bucket_type& slots,
+ const size_t num_bins)
+ {
+ BloomOp<N, CBF> checker(t);
+ return checker.check(slots, num_bins);
+
+ }
+ };
+
+ } // namespace detail
+ } // namespace bloom_filter
+} // namespace boost
+#endif

Modified: sandbox/bloom_filter/trunk/boost/bloom_filter/dynamic_bloom_filter.hpp
==============================================================================
--- sandbox/bloom_filter/trunk/boost/bloom_filter/dynamic_bloom_filter.hpp (original)
+++ sandbox/bloom_filter/trunk/boost/bloom_filter/dynamic_bloom_filter.hpp 2011-08-11 10:03:37 EDT (Thu, 11 Aug 2011)
@@ -84,6 +84,12 @@
         return this->count() == 0;
       }
 
+ const bitset_type&
+ data() const
+ {
+ return this->bits;
+ }
+
       // core operations
       void insert(const T& t) {
         static const unsigned N = mpl::size<HashFunctions>::value - 1;

Modified: sandbox/bloom_filter/trunk/boost/bloom_filter/dynamic_counting_bloom_filter.hpp
==============================================================================
--- sandbox/bloom_filter/trunk/boost/bloom_filter/dynamic_counting_bloom_filter.hpp (original)
+++ sandbox/bloom_filter/trunk/boost/bloom_filter/dynamic_counting_bloom_filter.hpp 2011-08-11 10:03:37 EDT (Thu, 11 Aug 2011)
@@ -173,6 +173,12 @@
         return this->count() == 0;
       }
 
+ const bucket_type&
+ data() const
+ {
+ return this->bits;
+ }
+
       //! core ops
       void insert(const T& t)
       {

Modified: sandbox/bloom_filter/trunk/boost/bloom_filter/twohash_basic_bloom_filter.hpp
==============================================================================
--- sandbox/bloom_filter/trunk/boost/bloom_filter/twohash_basic_bloom_filter.hpp (original)
+++ sandbox/bloom_filter/trunk/boost/bloom_filter/twohash_basic_bloom_filter.hpp 2011-08-11 10:03:37 EDT (Thu, 11 Aug 2011)
@@ -111,6 +111,12 @@
         return this->count() == 0;
       }
 
+ const bitset_type&
+ data() const
+ {
+ return this->bits;
+ }
+
       //* core ops
       void insert(const T& t)
       {

Copied: sandbox/bloom_filter/trunk/boost/bloom_filter/twohash_counting_bloom_filter.hpp (from r73639, /sandbox/bloom_filter/trunk/boost/bloom_filter/counting_bloom_filter.hpp)
==============================================================================
--- /sandbox/bloom_filter/trunk/boost/bloom_filter/counting_bloom_filter.hpp (original)
+++ sandbox/bloom_filter/trunk/boost/bloom_filter/twohash_counting_bloom_filter.hpp 2011-08-11 10:03:37 EDT (Thu, 11 Aug 2011)
@@ -10,23 +10,22 @@
 //
 //////////////////////////////////////////////////////////////////////////////
 
-#ifndef BOOST_BLOOM_FILTER_COUNTING_BLOOM_FILTER_HPP
-#define BOOST_BLOOM_FILTER_COUNTING_BLOOM_FILTER_HPP 1
+#ifndef BOOST_BLOOM_FILTER_TWOHASH_COUNTING_BLOOM_FILTER_HPP
+#define BOOST_BLOOM_FILTER_TWOHASH_COUNTING_BLOOM_FILTER_HPP 1
 
 #include <cmath>
 
 #include <boost/config.hpp>
 #include <boost/array.hpp>
 
-#include <boost/mpl/vector.hpp>
-#include <boost/mpl/size.hpp>
-
 #include <boost/static_assert.hpp>
 #include <boost/type_traits/is_integral.hpp>
 #include <boost/type_traits/is_unsigned.hpp>
 
-#include <boost/bloom_filter/detail/counting_apply_hash.hpp>
+#include <boost/bloom_filter/detail/twohash_counting_apply_hash.hpp>
+#include <boost/bloom_filter/detail/extenders.hpp>
 #include <boost/bloom_filter/hash/default.hpp>
+#include <boost/bloom_filter/hash/murmurhash3.hpp>
 
 #ifndef BOOST_NO_0X_HDR_INITIALIZER_LIST
 #include <initializer_list>
@@ -37,9 +36,13 @@
     template <typename T,
               size_t NumBins,
               size_t BitsPerBin = 4,
- class HashFunctions = mpl::vector<boost_hash<T, 0> >,
+ size_t HashValues = 2,
+ size_t ExpectedInsertionCount = 0,
+ class HashFunction1 = boost_hash<T, 0>,
+ class HashFunction2 = murmurhash3<T>,
+ class ExtensionFunction = detail::square,
               typename Block = size_t>
- class counting_bloom_filter {
+ class twohash_counting_bloom_filter {
 
       // Block needs to be an integral type
       BOOST_STATIC_ASSERT( boost::is_integral<Block>::value == true);
@@ -70,28 +73,37 @@
       static const size_t bin_bits = NumBins * BitsPerBin;
       static const size_t array_size = bin_bits / slot_bits + 1;
 
+
     public:
       typedef T value_type;
       typedef T key_type;
- typedef HashFunctions hash_function_type;
+ typedef HashFunction1 hash_function1_type;
+ typedef HashFunction2 hash_function2_type;
+ typedef ExtensionFunction extension_function_type;
       typedef Block block_type;
- typedef counting_bloom_filter<T, NumBins, BitsPerBin,
- HashFunctions, Block> this_type;
+ typedef twohash_counting_bloom_filter<T, NumBins, BitsPerBin, HashValues,
+ ExpectedInsertionCount,
+ HashFunction1, HashFunction2,
+ ExtensionFunction, Block> this_type;
 
       typedef boost::array<Block, array_size> bucket_type;
       typedef typename bucket_type::iterator bucket_iterator;
       typedef typename bucket_type::const_iterator bucket_const_iterator;
 
+ private:
+ typedef detail::twohash_counting_apply_hash<HashValues - 1,
+ this_type> apply_hash_type;
+
     public:
       //! constructors
- counting_bloom_filter()
+ twohash_counting_bloom_filter()
       {
         this->clear();
       }
 
       template <typename InputIterator>
- counting_bloom_filter(const InputIterator start,
- const InputIterator end)
+ twohash_counting_bloom_filter(const InputIterator start,
+ const InputIterator end)
       {
         this->clear();
 
@@ -100,7 +112,7 @@
       }
 
 #ifndef BOOST_NO_0X_HDR_INITIALIZER_LIST
- counting_bloom_filter(const std::initializer_list<T>& ilist)
+ twohash_counting_bloom_filter(const std::initializer_list<T>& ilist)
       {
         this->clear();
 
@@ -139,7 +151,7 @@
 
       static BOOST_CONSTEXPR size_t num_hash_functions()
       {
- return mpl::size<HashFunctions>::value;
+ return HashValues;
       }
 
       double false_positive_rate() const
@@ -177,13 +189,18 @@
         return this->count() == 0;
       }
 
+ const bucket_type&
+ data() const
+ {
+ return this->bits;
+ }
+
       //! core ops
       void insert(const T& t)
       {
- static const unsigned N = boost::mpl::size<hash_function_type>::value - 1;
- detail::counting_apply_hash<N, this_type>::insert(t,
- this->bits,
- this->num_bins());
+ apply_hash_type::insert(t,
+ this->bits,
+ this->num_bins());
       }
 
       template <typename InputIterator>
@@ -196,10 +213,9 @@
 
       void remove(const T& t)
       {
- static const unsigned N = boost::mpl::size<hash_function_type>::value - 1;
- detail::counting_apply_hash<N, this_type>::remove(t,
- this->bits,
- this->num_bins());
+ apply_hash_type::remove(t,
+ this->bits,
+ this->num_bins());
       }
 
       template <typename InputIterator>
@@ -212,11 +228,9 @@
 
       bool probably_contains(const T& t) const
       {
- static const unsigned N = mpl::size<HashFunctions>::value - 1;
- return detail::counting_apply_hash<N,
- this_type>::contains(t,
- this->bits,
- this->num_bins());
+ return apply_hash_type::contains(t,
+ this->bits,
+ this->num_bins());
       }
 
       //! auxiliary ops
@@ -228,128 +242,108 @@
         }
       }
 
- void swap(counting_bloom_filter& other)
+ void swap(twohash_counting_bloom_filter& other)
       {
- counting_bloom_filter tmp = other;
+ twohash_counting_bloom_filter tmp = other;
         other = *this;
         *this = tmp;
       }
 
- //! pairwise ops
- counting_bloom_filter&
- experimental_union_assign(const counting_bloom_filter& rhs)
- {
- bucket_iterator this_end = this->bits.end();
- bucket_iterator this_start = this->bits.begin();
- bucket_const_iterator rhs_start = rhs.bits.begin();
-
- for (; this_start != this_end; ++this_start, ++rhs_start) {
- *this_start |= *rhs_start;
- }
-
- return *this;
- }
-
- counting_bloom_filter&
- experimental_intersect_assign(const counting_bloom_filter& rhs)
- {
- bucket_iterator this_end = this->bits.end();
- bucket_iterator this_start = this->bits.begin();
- bucket_const_iterator rhs_start = rhs.bits.begin();
-
- for (; this_start != this_end; ++this_start, ++rhs_start) {
- *this_start &= *rhs_start;
- }
-
- return *this;
- }
-
       // equality comparison operators
       template <typename _T, size_t _Bins, size_t _BitsPerBin,
- typename _HashFns, typename _Block>
- friend bool
- operator==(const counting_bloom_filter<_T, _Bins, _BitsPerBin,
- _HashFns, _Block>& lhs,
- const counting_bloom_filter<_T, _Bins, _BitsPerBin,
- _HashFns, _Block>& rhs);
-
- template <typename _T, size_t _Bins, size_t _BitsPerBin,
- typename _HashFns, typename _Block>
- friend bool
- operator!=(const counting_bloom_filter<_T, _Bins, _BitsPerBin,
- _HashFns, _Block>& lhs,
- const counting_bloom_filter<_T, _Bins, _BitsPerBin,
- _HashFns, _Block>& rhs);
-
+ size_t _HashValues, size_t _ExpectedInsertionCount,
+ class _HashFn1, class _HashFn2, class _ExtFn,
+ typename _Block>
+ friend bool
+ operator==(const twohash_counting_bloom_filter<_T, _Bins, _BitsPerBin,
+ _HashValues,
+ _ExpectedInsertionCount,
+ _HashFn1, _HashFn2, _ExtFn,
+ _Block>& lhs,
+ const twohash_counting_bloom_filter<_T, _Bins, _BitsPerBin,
+ _HashValues,
+ _ExpectedInsertionCount,
+ _HashFn1, _HashFn2, _ExtFn,
+ _Block>& rhs);
 
     private:
       bucket_type bits;
     };
 
- // union
- template<class T, size_t NumBins, size_t BitsPerBin, class HashFunctions,
- typename Block>
- counting_bloom_filter<T, NumBins, BitsPerBin, HashFunctions, Block>
- experimental_union(const counting_bloom_filter<T, NumBins, BitsPerBin,
- HashFunctions, Block>& lhs,
- const counting_bloom_filter<T, NumBins, BitsPerBin,
- HashFunctions, Block>& rhs)
- {
- counting_bloom_filter<T, NumBins, BitsPerBin,
- HashFunctions, Block> ret(lhs);
- ret.experimental_union_assign(rhs);
- return ret;
- }
-
- // intersection
- template<class T, size_t NumBins, size_t BitsPerBin, class HashFunctions,
- typename Block>
- counting_bloom_filter<T, NumBins, BitsPerBin, HashFunctions, Block>
- experimental_intersect(const counting_bloom_filter<T, NumBins, BitsPerBin,
- HashFunctions, Block>& lhs,
- const counting_bloom_filter<T, NumBins, BitsPerBin,
- HashFunctions, Block>& rhs)
- {
- counting_bloom_filter<T, NumBins, BitsPerBin,
- HashFunctions, Block> ret(lhs);
- ret.experimental_intersect_assign(rhs);
- return ret;
- }
-
- template<class T, size_t NumBins, size_t BitsPerBin, class HashFunctions,
- typename Block>
+ template <typename T, size_t NumBins, size_t BitsPerBin,
+ size_t HashValues, size_t ExpectedInsertionCount,
+ class HashFunction1, class HashFunction2,
+ class ExtensionFunction, typename Block>
     void
- swap(counting_bloom_filter<T, NumBins, BitsPerBin,
- HashFunctions, Block>& lhs,
- counting_bloom_filter<T, NumBins, BitsPerBin,
- HashFunctions, Block>& rhs)
-
+ swap(twohash_counting_bloom_filter<T, NumBins,
+ BitsPerBin,
+ HashValues,
+ ExpectedInsertionCount,
+ HashFunction1,
+ HashFunction2,
+ ExtensionFunction,
+ Block>& lhs,
+ twohash_counting_bloom_filter<T, NumBins,
+ BitsPerBin,
+ HashValues,
+ ExpectedInsertionCount,
+ HashFunction1,
+ HashFunction2,
+ ExtensionFunction,
+ Block>& rhs)
     {
       lhs.swap(rhs);
     }
 
- template<class T, size_t NumBins, size_t BitsPerBin, class HashFunctions,
- typename Block>
+ template <typename T, size_t NumBins, size_t BitsPerBin,
+ size_t HashValues, size_t ExpectedInsertionCount,
+ class HashFunction1, class HashFunction2,
+ class ExtensionFunction, typename Block>
     bool
- operator==(const counting_bloom_filter<T, NumBins, BitsPerBin,
- HashFunctions, Block>& lhs,
- const counting_bloom_filter<T, NumBins, BitsPerBin,
- HashFunctions, Block>& rhs)
+ operator==(const twohash_counting_bloom_filter<T, NumBins,
+ BitsPerBin,
+ HashValues,
+ ExpectedInsertionCount,
+ HashFunction1,
+ HashFunction2,
+ ExtensionFunction,
+ Block>& lhs,
+ const twohash_counting_bloom_filter<T, NumBins,
+ BitsPerBin,
+ HashValues,
+ ExpectedInsertionCount,
+ HashFunction1,
+ HashFunction2,
+ ExtensionFunction,
+ Block>& rhs)
     {
       return (lhs.bits == rhs.bits);
     }
 
- template<class T, size_t NumBins, size_t BitsPerBin, class HashFunctions,
- typename Block>
+ template <typename T, size_t NumBins, size_t BitsPerBin,
+ size_t HashValues, size_t ExpectedInsertionCount,
+ class HashFunction1, class HashFunction2,
+ class ExtensionFunction, typename Block>
     bool
- operator!=(const counting_bloom_filter<T, NumBins, BitsPerBin,
- HashFunctions, Block>& lhs,
- const counting_bloom_filter<T, NumBins, BitsPerBin,
- HashFunctions, Block>& rhs)
+ operator!=(const twohash_counting_bloom_filter<T, NumBins,
+ BitsPerBin,
+ HashValues,
+ ExpectedInsertionCount,
+ HashFunction1,
+ HashFunction2,
+ ExtensionFunction,
+ Block>& lhs,
+ const twohash_counting_bloom_filter<T, NumBins,
+ BitsPerBin,
+ HashValues,
+ ExpectedInsertionCount,
+ HashFunction1,
+ HashFunction2,
+ ExtensionFunction,
+ Block>& rhs)
     {
       return !(lhs == rhs);
     }
-
   } // namespace bloom_filter
 } // namespace boost
 #endif

Modified: sandbox/bloom_filter/trunk/boost/bloom_filter/twohash_dynamic_basic_bloom_filter.hpp
==============================================================================
--- sandbox/bloom_filter/trunk/boost/bloom_filter/twohash_dynamic_basic_bloom_filter.hpp (original)
+++ sandbox/bloom_filter/trunk/boost/bloom_filter/twohash_dynamic_basic_bloom_filter.hpp 2011-08-11 10:03:37 EDT (Thu, 11 Aug 2011)
@@ -112,6 +112,12 @@
         return this->count() == 0;
       }
 
+ const bitset_type&
+ data() const
+ {
+ return this->bits;
+ }
+
       //* core ops
       void insert(const T& t)
       {


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk