Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r73485 - in sandbox/bloom_filter/trunk/boost/bloom_filter: . detail
From: cpp.cabrera_at_[hidden]
Date: 2011-08-02 05:33:04


Author: alejandro
Date: 2011-08-02 05:33:03 EDT (Tue, 02 Aug 2011)
New Revision: 73485
URL: http://svn.boost.org/trac/boost/changeset/73485

Log:
Re-implemented counting_apply_hash to remove all code duplication. Updated counting_bloom_filter to use new version.
Text files modified:
   sandbox/bloom_filter/trunk/boost/bloom_filter/counting_bloom_filter.hpp | 65 ++++++----
   sandbox/bloom_filter/trunk/boost/bloom_filter/detail/counting_apply_hash.hpp | 230 +++++++++++++++------------------------
   2 files changed, 130 insertions(+), 165 deletions(-)

Modified: sandbox/bloom_filter/trunk/boost/bloom_filter/counting_bloom_filter.hpp
==============================================================================
--- sandbox/bloom_filter/trunk/boost/bloom_filter/counting_bloom_filter.hpp (original)
+++ sandbox/bloom_filter/trunk/boost/bloom_filter/counting_bloom_filter.hpp 2011-08-02 05:33:03 EDT (Tue, 02 Aug 2011)
@@ -67,24 +67,23 @@
       // a slot is one element position in the array
       // a bin is a segment of a slot
       static const size_t slot_bits = sizeof(Block) * 8;
- static const size_t bins_per_slot = slot_bits / BitsPerBin;
       static const size_t bin_bits = NumBins * BitsPerBin;
       static const size_t array_size = bin_bits / slot_bits + 1;
- static const size_t effective_num_bins = array_size * bins_per_slot;
- static const size_t mask =
- static_cast<Block>(0 - 1) >> (slot_bits - BitsPerBin);
 
     public:
       typedef T value_type;
       typedef T key_type;
       typedef HashFunctions hash_function_type;
       typedef Block block_type;
+ typedef counting_bloom_filter<T, NumBins, BitsPerBin,
+ HashFunctions, Block> this_type;
 
       typedef boost::array<Block, array_size> bucket_type;
       typedef typename bucket_type::iterator bucket_iterator;
- typedef typename bucket_type::const_iterator bucket_const_iterator;
+ typedef typename bucket_type::const_iterator bucket_const_iterator;
 
     public:
+ //! constructors
       counting_bloom_filter()
       {
         this->clear();
@@ -112,6 +111,27 @@
       }
 #endif
 
+ //! meta functions
+ static BOOST_CONSTEXPR size_t num_bins()
+ {
+ return NumBins;
+ }
+
+ static BOOST_CONSTEXPR size_t bits_per_bin()
+ {
+ return BitsPerBin;
+ }
+
+ static BOOST_CONSTEXPR size_t bins_per_slot()
+ {
+ return sizeof(block_type) * 8 / BitsPerBin;
+ }
+
+ static BOOST_CONSTEXPR size_t mask()
+ {
+ return static_cast<Block>(0 - 1) >> (slot_bits - BitsPerBin);
+ }
+
       static BOOST_CONSTEXPR size_t bit_capacity()
       {
         return NumBins * BitsPerBin;
@@ -120,7 +140,7 @@
       static BOOST_CONSTEXPR size_t num_hash_functions()
       {
         return mpl::size<HashFunctions>::value;
- };
+ }
 
       double false_positive_rate() const
       {
@@ -130,9 +150,9 @@
         static const double e =
           2.718281828459045235360287471352662497757247093699959574966;
         return std::pow(1 - std::pow(e, -k * n / m), k);
- };
+ }
 
- // returns the number of bins that have at least 1 bit set
+ //? returns the number of bins that have at least 1 bit set
       size_t count() const
       {
         size_t ret = 0;
@@ -140,9 +160,9 @@
         for (bucket_const_iterator i = this->bits.begin(),
                end = this->bits.end();
              i != end; ++i) {
- for (size_t bin = 0; bin < bins_per_slot; ++bin) {
- size_t offset_bits = bin * BitsPerBin;
- size_t target_bits = (*i >> offset_bits) & mask;
+ for (size_t bin = 0; bin < this->bins_per_slot(); ++bin) {
+ const size_t offset_bits = bin * BitsPerBin;
+ const size_t target_bits = (*i >> offset_bits) & this->mask();
 
             if (target_bits > 0)
               ++ret;
@@ -150,20 +170,18 @@
         }
 
         return ret;
- };
+ }
 
       bool empty() const
       {
         return this->count() == 0;
       }
 
+ //! core ops
       void insert(const T& t)
       {
- static const unsigned N = mpl::size<HashFunctions>::value - 1;
- detail::counting_apply_hash<N, T, NumBins,
- BitsPerBin, HashFunctions,
- Block, array_size,
- bins_per_slot>::insert(t, this->bits);
+ static const unsigned N = boost::mpl::size<hash_function_type>::value - 1;
+ detail::counting_apply_hash<N, this_type>::insert(t, this->bits);
       }
 
       template <typename InputIterator>
@@ -176,11 +194,8 @@
                            
       void remove(const T& t)
       {
- static const unsigned N = mpl::size<HashFunctions>::value - 1;
- detail::counting_apply_hash<N, T, NumBins,
- BitsPerBin, HashFunctions,
- Block, array_size,
- bins_per_slot>::remove(t, this->bits);
+ static const unsigned N = boost::mpl::size<hash_function_type>::value - 1;
+ detail::counting_apply_hash<N, this_type>::remove(t, this->bits);
       }
       
       template <typename InputIterator>
@@ -194,10 +209,8 @@
       bool probably_contains(const T& t) const
       {
         static const unsigned N = mpl::size<HashFunctions>::value - 1;
- return detail::counting_apply_hash<N, T, NumBins,
- BitsPerBin, HashFunctions,
- Block, array_size,
- bins_per_slot>::contains(t, this->bits);
+ return detail::counting_apply_hash<N, this_type>::contains(t,
+ this->bits);
       }
 
       void clear()

Modified: sandbox/bloom_filter/trunk/boost/bloom_filter/detail/counting_apply_hash.hpp
==============================================================================
--- sandbox/bloom_filter/trunk/boost/bloom_filter/detail/counting_apply_hash.hpp (original)
+++ sandbox/bloom_filter/trunk/boost/bloom_filter/detail/counting_apply_hash.hpp 2011-08-02 05:33:03 EDT (Tue, 02 Aug 2011)
@@ -13,9 +13,6 @@
 #ifndef BOOST_BLOOM_FILTER_COUNTING_APPLY_HASH_HPP
 #define BOOST_BLOOM_FILTER_COUNTING_APPLY_HASH_HPP
 
-#include <iostream>
-
-#include <boost/array.hpp>
 #include <boost/mpl/at.hpp>
 
 #include <boost/bloom_filter/detail/exceptions.hpp>
@@ -24,161 +21,116 @@
   namespace bloom_filters {
     namespace detail {
 
- template <typename T,
- typename Hash,
- typename Bitset,
- size_t NumBins, size_t BinsPerSlot,
- size_t BitsPerBin, size_t Mask>
- static size_t
- get_bits(const T& t, const Bitset slots)
- {
- static Hash hasher;
-
- const size_t hash_val = hasher(t) % NumBins;
- const size_t pos = hash_val / BinsPerSlot;
- const size_t offset_bits = (hash_val % BinsPerSlot) * BitsPerBin;
-
- return ((slots[pos] >> offset_bits) & Mask);
- }
-
- template <size_t N,
- typename T,
- size_t NumBins,
- size_t BitsPerBin,
- class HashFunctions,
- typename Block,
- size_t ArraySize,
- size_t BinsPerSlot>
- struct counting_apply_hash
- {
- static const size_t MASK =
- static_cast<Block>(0 - 1) >> (sizeof(Block) * 8 - BitsPerBin);
-
- static void insert(const T& t, boost::array<Block, ArraySize>& _slots)
- {
- typedef typename boost::mpl::at_c<HashFunctions, N>::type Hash;
- static Hash hasher;
-
- const size_t hash_val = hasher(t) % NumBins;
- const size_t pos = hash_val / BinsPerSlot;
- const size_t offset_bits = (hash_val % BinsPerSlot) * BitsPerBin;
- size_t target_bits = (_slots[pos] >> offset_bits) & MASK;
- ++target_bits;
+ struct decrement {
+ size_t operator()(const size_t val, const size_t limit) {
+ if (val == limit)
+ throw bin_underflow_exception();
 
- if (target_bits == (1 << BitsPerBin))
+ return val - 1;
+ }
+ };
+
+ struct increment {
+ size_t operator()(const size_t val, const size_t limit) {
+ if ((val+1) == limit)
             throw bin_overflow_exception();
-
- _slots[pos] |= (target_bits << offset_bits);
-
- counting_apply_hash<N-1, T, NumBins,
- BitsPerBin, HashFunctions,
- Block, ArraySize, BinsPerSlot>::insert(t, _slots);
- }
-
- static void remove(const T& t, boost::array<Block, ArraySize>& _slots)
- {
- typedef typename boost::mpl::at_c<HashFunctions, N>::type Hash;
- static Hash hasher;
-
- const size_t hash_val = hasher(t) % NumBins;
- const size_t pos = hash_val / BinsPerSlot;
- const size_t offset_bits = (hash_val % BinsPerSlot) * BitsPerBin;
- size_t target_bits = (_slots[pos] >> offset_bits) & MASK;
 
- if (target_bits == 0)
- throw bin_underflow_exception();
-
- --target_bits;
+ return val+1;
+ }
+ };
 
- _slots[pos] |= (target_bits << offset_bits);
+ template <size_t N, class CBF, class Op = void>
+ struct BloomOp {
+ typedef typename boost::mpl::at_c<typename CBF::hash_function_type,
+ N>::type Hash;
+
+ BloomOp(const typename CBF::value_type& t,
+ const typename CBF::bucket_type& slots)
+ :
+ hash_val(hasher(t) % CBF::num_bins()),
+ pos(hash_val / CBF::bins_per_slot()),
+ offset_bits((hash_val % CBF::bins_per_slot()) * CBF::bits_per_bin()),
+ target_bits((slots[pos] >> offset_bits) & CBF::mask())
+ {}
+
+ void update(const typename CBF::value_type& t,
+ typename CBF::bucket_type& slots,
+ const size_t limit) const {
+ static Op op;
+
+ const size_t final_bits = op(target_bits, limit);
+ slots[pos] &= ~(CBF::mask() << offset_bits);
+ slots[pos] |= (final_bits << offset_bits);
+ }
 
- counting_apply_hash<N-1, T, NumBins,
- BitsPerBin, HashFunctions,
- Block, ArraySize, BinsPerSlot>::remove(t, _slots);
- }
-
- static bool contains(const T& t, const boost::array<Block, ArraySize>& _slots)
- {
- typedef typename boost::mpl::at_c<HashFunctions, N>::type Hash;
- static Hash hasher;
-
- const size_t hash_val = hasher(t) % NumBins;
- const size_t pos = hash_val / BinsPerSlot;
- const size_t offset_bits = (hash_val % BinsPerSlot) * BitsPerBin;
- const size_t target_bits = (_slots[pos] >> offset_bits) & MASK;
-
- return ((target_bits != 0) &&
- counting_apply_hash<N-1, T, NumBins,
- BitsPerBin, HashFunctions,
- Block, ArraySize, BinsPerSlot>::contains(t, _slots));
- }
+ bool check() const {
+ return (target_bits != 0);
+ }
+
+ Hash hasher;
+ const size_t hash_val;
+ const size_t pos;
+ const size_t offset_bits;
+ const size_t target_bits;
       };
 
- template <typename T,
- size_t NumBins,
- size_t BitsPerBin,
- class HashFunctions,
- typename Block,
- size_t ArraySize,
- size_t BinsPerSlot>
- struct counting_apply_hash<0, T, NumBins, BitsPerBin, HashFunctions,
- Block, ArraySize, BinsPerSlot>
+ // CBF : Counting Bloom Filter
+ template <size_t N,
+ class CBF>
+ struct counting_apply_hash
       {
- static const size_t MASK =
- static_cast<Block>(0 - 1) >> (sizeof(Block) * 8 - BitsPerBin);
-
- static void insert(const T& t,
- boost::array<Block, ArraySize>& _slots)
+ static void insert(const typename CBF::value_type& t,
+ typename CBF::bucket_type& slots)
         {
- typedef typename boost::mpl::at_c<HashFunctions, 0>::type Hash;
- static Hash hasher;
-
- const size_t hash_val = hasher(t) % NumBins;
- const size_t pos = hash_val / BinsPerSlot;
- const size_t offset_bits = (hash_val % BinsPerSlot) * BitsPerBin;
- size_t target_bits = (_slots[pos] >> offset_bits) & MASK;
+ BloomOp<N, CBF, increment> inserter(t, slots);
+ inserter.update(t, slots, (1 << CBF::bits_per_bin()));
 
- ++target_bits;
-
- if (target_bits == (1 << BitsPerBin))
- throw bin_overflow_exception();
-
- _slots[pos] &= ~(MASK << offset_bits);
- _slots[pos] |= (target_bits << offset_bits);
- }
-
- static void remove(const T& t,
- boost::array<Block, ArraySize>& _slots)
- {
- typedef typename boost::mpl::at_c<HashFunctions, 0>::type Hash;
- static Hash hasher;
-
- const size_t hash_val = hasher(t) % NumBins;
- const size_t pos = hash_val / BinsPerSlot;
- const size_t offset_bits = (hash_val % BinsPerSlot) * BitsPerBin;
- size_t target_bits = (_slots[pos] >> offset_bits) & MASK;
+ counting_apply_hash<N-1, CBF>::insert(t, slots);
+ }
 
- if (target_bits == 0)
- throw bin_underflow_exception();
+ static void remove(const typename CBF::value_type& t,
+ typename CBF::bucket_type& slots)
+ {
+ BloomOp<N, CBF, decrement> remover(t, slots);
+ remover.update(t, slots, 0);
 
- --target_bits;
+ counting_apply_hash<N-1, CBF>::remove(t, slots);
+ }
 
- _slots[pos] &= ~(MASK << offset_bits);
- _slots[pos] |= (target_bits << offset_bits);
+ static bool contains(const typename CBF::value_type& t,
+ const typename CBF::bucket_type& slots)
+ {
+ BloomOp<N, CBF> checker(t, slots);
+ return (checker.check() &&
+ counting_apply_hash<N-1, CBF>::contains(t, slots));
         }
+ };
 
- static bool contains(const T& t,
- const boost::array<Block, ArraySize>& _slots)
+ template <class CBF>
+ struct counting_apply_hash<0, CBF>
+ {
+ static void insert(const typename CBF::value_type& t,
+ typename CBF::bucket_type& slots)
         {
- typedef typename boost::mpl::at_c<HashFunctions, 0>::type Hash;
+ BloomOp<0, CBF, increment> inserter(t, slots);
+ inserter.update(t, slots, (1 << CBF::bits_per_bin()));
+ }
 
- size_t target_bits =
- get_bits<T, Hash, boost::array<Block, ArraySize>,
- NumBins, BinsPerSlot, BitsPerBin, MASK>(t, _slots);
+ static void remove(const typename CBF::value_type& t,
+ typename CBF::bucket_type& slots)
+ {
+ BloomOp<0, CBF, decrement> remover(t, slots);
+ remover.update(t, slots, 0);
+ }
 
- return (target_bits != 0);
- }
+ static bool contains(const typename CBF::value_type& t,
+ const typename CBF::bucket_type& slots)
+ {
+ BloomOp<0, CBF> checker(t, slots);
+ return (checker.check());
+ }
       };
+
     } // namespace detail
   } // namespace bloom_filter
 } // namespace boost


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk