|
Boost-Commit : |
Subject: [Boost-commit] svn:boost r72927 - in sandbox/bloom_filter/trunk/boost/bloom_filter: . detail
From: cpp.cabrera_at_[hidden]
Date: 2011-07-06 13:09:21
Author: alejandro
Date: 2011-07-06 13:09:20 EDT (Wed, 06 Jul 2011)
New Revision: 72927
URL: http://svn.boost.org/trac/boost/changeset/72927
Log:
Added print statements for debug - commented out after fix. insert, remove, probably_contains, range constructor, init_list constructor, swap, global swap, quality operators, and clear verified to be working as per the regression suite. Still need to implement union, intersect, false_positive_rate.
Text files modified:
sandbox/bloom_filter/trunk/boost/bloom_filter/counting_bloom.hpp | 68 +++++++++++++++++++++++++++++--
sandbox/bloom_filter/trunk/boost/bloom_filter/detail/counting_apply_hash.hpp | 83 ++++++++++++++++++++++++++++++++++-----
2 files changed, 133 insertions(+), 18 deletions(-)
Modified: sandbox/bloom_filter/trunk/boost/bloom_filter/counting_bloom.hpp
==============================================================================
--- sandbox/bloom_filter/trunk/boost/bloom_filter/counting_bloom.hpp (original)
+++ sandbox/bloom_filter/trunk/boost/bloom_filter/counting_bloom.hpp 2011-07-06 13:09:20 EDT (Wed, 06 Jul 2011)
@@ -18,12 +18,15 @@
* of hash function application.
*/
#include <cmath>
-#include <boost/array.hpp>
+#include <iostream>
+#include <boost/array.hpp>
#include <boost/config.hpp>
#include <boost/mpl/vector.hpp>
#include <boost/mpl/size.hpp>
#include <boost/static_assert.hpp>
+#include <boost/type_traits/is_integral.hpp>
+#include <boost/type_traits/is_unsigned.hpp>
#include <boost/bloom_filter/detail/counting_apply_hash.hpp>
#include <boost/bloom_filter/hash/default.hpp>
@@ -41,16 +44,32 @@
typename Block = size_t>
class counting_bloom_filter {
+ // Block needs to be an integral type
+ BOOST_STATIC_ASSERT( boost::is_integral<Block>::value == true);
+
+ // Block needs to be an unsigned type
+ BOOST_STATIC_ASSERT( boost::is_unsigned<Block>::value == true);
+
// it doesn't make sense to ever support using a BitsPerBin value larger
// than the number of bits per Block. In that case, the user shouldn't
// be using a Bloom filter to represent their data.
BOOST_STATIC_ASSERT( (BitsPerBin <= (sizeof(Block) * 8) ) );
+ // because of the nature of this implementation, the Bloom filter
+ // can have internal fragmentation if the calculation for
+ // bins_per_slot has a remainder. The severity of the internal
+ // fragmentation is equal to the remainder * the number of slots.
+ BOOST_STATIC_ASSERT( ((sizeof(Block) * 8) % BitsPerBin) == 0);
+
// a slot is one element position in the array
- // a bin is a segment of a slot
+ // a bin is a segment of a slot
static const size_t slot_bits = sizeof(Block) * 8;
static const size_t bins_per_slot = slot_bits / BitsPerBin;
- static const size_t array_size = bins_per_slot * NumBins + 1;
+ static const size_t bin_bits = NumBins * BitsPerBin;
+ static const size_t array_size = bin_bits / slot_bits + 1;
+ static const size_t effective_num_bins = array_size * bins_per_slot;
+ static const size_t mask =
+ static_cast<Block>(0 - 1) >> (slot_bits - BitsPerBin);
private:
typedef boost::array<Block, array_size> bucket_type;
@@ -64,17 +83,24 @@
typedef Block block_type;
public:
- counting_bloom_filter() {}
+ counting_bloom_filter()
+ {
+ this->clear();
+ }
template <typename InputIterator>
counting_bloom_filter(const InputIterator start,
const InputIterator end) {
+ this->clear();
+
for (InputIterator i = start; i != end; ++i)
this->insert(*i);
}
#ifndef BOOST_NO_0X_HDR_INITIALIZER_LIST
counting_bloom_filter(const std::initializer_list<T>& ilist) {
+ this->clear();
+
typedef typename std::initializer_list<T>::const_iterator citer;
for (citer i = ilist.begin(), end = ilist.end(); i != end; ++i) {
this->insert(*i);
@@ -98,9 +124,39 @@
2.718281828459045235360287471352662497757247093699959574966;
return std::pow(1 - std::pow(e, -k * n / m), k);
};
-
+
+ // returns the number of bins that have at least 1 bit set
size_t count() const {
- return 0;
+ size_t ret = 0;
+
+ /*
+ std::cout << "Num Bins: " << NumBins << "\n"
+ << "Bins Per Slot: " << bins_per_slot << "\n"
+ << "Bits Per Bin: " << BitsPerBin << "\n"
+ << "Array Size: " << array_size << "\n";
+ */
+
+ for (bucket_const_iterator i = this->bits.begin(),
+ end = this->bits.end();
+ i != end; ++i) {
+ for (size_t bin = 0; bin < bins_per_slot; ++bin) {
+ size_t offset_bits = bin * BitsPerBin;
+ size_t target_bits = (*i >> offset_bits) & mask;
+
+ /*
+ std::cout << "(count) targeting pos: " << std::distance(i, end) - 1
+ << " with offset: " << offset_bits
+ << " and bin: " << bin
+ << " with slot value: " << *i
+ << " and bin value: " << target_bits << "\n";
+ */
+
+ if (target_bits > 0)
+ ++ret;
+ }
+ }
+
+ return ret;
};
bool empty() const {
Modified: sandbox/bloom_filter/trunk/boost/bloom_filter/detail/counting_apply_hash.hpp
==============================================================================
--- sandbox/bloom_filter/trunk/boost/bloom_filter/detail/counting_apply_hash.hpp (original)
+++ sandbox/bloom_filter/trunk/boost/bloom_filter/detail/counting_apply_hash.hpp 2011-07-06 13:09:20 EDT (Wed, 06 Jul 2011)
@@ -13,11 +13,11 @@
#ifndef BOOST_BLOOM_FILTER_COUNTING_APPLY_HASH_HPP
#define BOOST_BLOOM_FILTER_COUNTING_APPLY_HASH_HPP
+#include <iostream>
+
#include <boost/array.hpp>
#include <boost/mpl/at.hpp>
-#include <limits>
-
namespace boost {
namespace bloom_filter {
namespace detail {
@@ -36,9 +36,10 @@
struct counting_apply_hash
{
static const size_t MASK =
- std::numeric_limits<Block>::max() >> (sizeof(Block) * 8 - BitsPerBin);
+ static_cast<Block>(0 - 1) >> (sizeof(Block) * 8 - BitsPerBin);
- static void insert(const T& t, boost::array<Block, ArraySize>& _slots) {
+ static void insert(const T& t, boost::array<Block, ArraySize>& _slots)
+ {
typedef typename boost::mpl::at_c<HashFunctions, N>::type Hash;
static Hash hasher;
@@ -54,7 +55,8 @@
Block, ArraySize, BinsPerSlot>::insert(t, _slots);
}
- static void remove(const T& t, boost::array<Block, ArraySize>& _slots) {
+ static void remove(const T& t, boost::array<Block, ArraySize>& _slots)
+ {
typedef typename boost::mpl::at_c<HashFunctions, N>::type Hash;
static Hash hasher;
@@ -70,7 +72,8 @@
Block, ArraySize, BinsPerSlot>::remove(t, _slots);
}
- static bool contains(const T& t, const boost::array<Block, ArraySize>& _slots) {
+ static bool contains(const T& t, const boost::array<Block, ArraySize>& _slots)
+ {
typedef typename boost::mpl::at_c<HashFunctions, N>::type Hash;
static Hash hasher;
@@ -78,6 +81,7 @@
size_t pos = hash_val / BinsPerSlot;
size_t offset_bits = (hash_val % BinsPerSlot) * BitsPerBin;
size_t target_bits = (_slots[pos] >> offset_bits) & MASK;
+
return ((target_bits > 0) &&
counting_apply_hash<N-1, T, NumBins,
BitsPerBin, HashFunctions,
@@ -96,41 +100,96 @@
Block, ArraySize, BinsPerSlot>
{
static const size_t MASK =
- std::numeric_limits<Block>::max() >> (sizeof(Block) * 8 - BitsPerBin);
+ static_cast<Block>(0 - 1) >> (sizeof(Block) * 8 - BitsPerBin);
- static void insert(const T& t, boost::array<Block, ArraySize>& _slots) {
+ static void insert(const T& t, boost::array<Block, ArraySize>& _slots)
+ {
typedef typename boost::mpl::at_c<HashFunctions, 0>::type Hash;
static Hash hasher;
-
+
+ /*
+ std::cout << "(meta) NumBins: " << NumBins
+ << " BitsPerBin: " << BitsPerBin
+ << " ArraySize: " << ArraySize
+ << " BinsPerSlot: " << BinsPerSlot
+ << " incoming value: " << t << "\n";
+ */
+
size_t hash_val = hasher(t) % NumBins;
size_t pos = hash_val / BinsPerSlot;
size_t offset_bits = (hash_val % BinsPerSlot) * BitsPerBin;
size_t target_bits = (_slots[pos] >> offset_bits) & MASK;
++target_bits;
+ _slots[pos] &= ~(MASK << offset_bits);
_slots[pos] |= (target_bits << offset_bits);
+
+ /*
+ std::cout << "(insert) updated bits at pos " << pos
+ << " at bit offset " << offset_bits
+ << " using mask " << MASK
+ << " hashed as " << hash_val
+ << " from " << target_bits - 1
+ << " to " << target_bits << "\n";
+ */
}
- static void remove(const T& t, boost::array<Block, ArraySize>& _slots) {
+ static void remove(const T& t, boost::array<Block, ArraySize>& _slots)
+ {
typedef typename boost::mpl::at_c<HashFunctions, 0>::type Hash;
static Hash hasher;
+
+ /*
+ std::cout << "(meta) NumBins: " << NumBins
+ << " BitsPerBin: " << BitsPerBin
+ << " ArraySize: " << ArraySize
+ << " BinsPerSlot: " << BinsPerSlot
+ << " incoming value: " << t << "\n";
+ */
size_t hash_val = hasher(t) % NumBins;
size_t pos = hash_val / BinsPerSlot;
size_t offset_bits = (hash_val % BinsPerSlot) * BitsPerBin;
size_t target_bits = (_slots[pos] >> offset_bits) & MASK;
- --target_bits;
+ --target_bits;
+ _slots[pos] &= ~(MASK << offset_bits);
_slots[pos] |= (target_bits << offset_bits);
+
+ /*
+ std::cout << "(remove) updated bits at pos " << pos
+ << " at bit offset " << offset_bits
+ << " using mask " << MASK
+ << " hashed as " << hash_val
+ << " from " << target_bits + 1
+ << " to " << target_bits << "\n";
+ */
}
- static bool contains(const T& t, const boost::array<Block, ArraySize>& _slots) {
+ static bool contains(const T& t, const boost::array<Block, ArraySize>& _slots)
+ {
typedef typename boost::mpl::at_c<HashFunctions, 0>::type Hash;
static Hash hasher;
+ /*
+ std::cout << "(meta) NumBins: " << NumBins
+ << " BitsPerBin: " << BitsPerBin
+ << " ArraySize: " << ArraySize
+ << " BinsPerSlot: " << BinsPerSlot
+ << " incoming value: " << t << "\n";
+ */
+
size_t hash_val = hasher(t) % NumBins;
size_t pos = hash_val / BinsPerSlot;
size_t offset_bits = (hash_val % BinsPerSlot) * BitsPerBin;
size_t target_bits = (_slots[pos] >> offset_bits) & MASK;
+ /*
+ std::cout << "(contains) checked bits at pos " << pos
+ << " at bit offset " << offset_bits
+ << " using mask " << MASK
+ << " hashed as " << hash_val
+ << " as " << target_bits << "\n";
+ */
+
return (target_bits > 0);
}
};
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk