Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r73561 - sandbox/bloom_filter/trunk/boost/bloom_filter
From: cpp.cabrera_at_[hidden]
Date: 2011-08-05 23:07:12


Author: alejandro
Date: 2011-08-05 23:07:11 EDT (Fri, 05 Aug 2011)
New Revision: 73561
URL: http://svn.boost.org/trac/boost/changeset/73561

Log:
Added intitial dynamic_counting_bloom_filter implementation.
Added:
   sandbox/bloom_filter/trunk/boost/bloom_filter/dynamic_counting_bloom_filter.hpp (contents, props changed)

Added: sandbox/bloom_filter/trunk/boost/bloom_filter/dynamic_counting_bloom_filter.hpp
==============================================================================
--- (empty file)
+++ sandbox/bloom_filter/trunk/boost/bloom_filter/dynamic_counting_bloom_filter.hpp 2011-08-05 23:07:11 EDT (Fri, 05 Aug 2011)
@@ -0,0 +1,381 @@
+//////////////////////////////////////////////////////////////////////////////
+//
+// (C) Copyright Alejandro Cabrera 2011.
+// Distributed under the Boost
+// Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or
+// copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+// See http://www.boost.org/libs/bloom_filter for documentation.
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#ifndef BOOST_BLOOM_FILTER_DYNAMIC_COUNTING_BLOOM_FILTER_HPP
+#define BOOST_BLOOM_FILTER_DYNAMIC_COUNTING_BLOOM_FILTER_HPP 1
+
+#include <cmath>
+#include <vector>
+
+#include <boost/config.hpp>
+
+#include <boost/mpl/vector.hpp>
+#include <boost/mpl/size.hpp>
+
+#include <boost/static_assert.hpp>
+#include <boost/type_traits/is_integral.hpp>
+#include <boost/type_traits/is_unsigned.hpp>
+
+#include <boost/bloom_filter/detail/counting_apply_hash.hpp>
+#include <boost/bloom_filter/hash/default.hpp>
+
+namespace boost {
+ namespace bloom_filters {
+ template <typename T,
+ size_t BitsPerBin = 4,
+ class HashFunctions = mpl::vector<boost_hash<T, 0> >,
+ typename Block = size_t,
+ typename Allocator = std::allocator<Block> >
+ class dynamic_counting_bloom_filter {
+
+ // Block needs to be an integral type
+ BOOST_STATIC_ASSERT( boost::is_integral<Block>::value == true);
+
+ // Block needs to be an unsigned type
+ BOOST_STATIC_ASSERT( boost::is_unsigned<Block>::value == true);
+
+ // BitsPerBin needs to be greater than 0
+ BOOST_STATIC_ASSERT( BitsPerBin > 0);
+
+ // it doesn't make sense to ever support using a BitsPerBin value larger
+ // than the number of bits per Block. In that case, the user shouldn't
+ // be using a Bloom filter to represent their data.
+ BOOST_STATIC_ASSERT( (BitsPerBin < (sizeof(Block) * 8) ) );
+
+ // because of the nature of this implementation, the Bloom filter
+ // can have internal fragmentation if the calculation for
+ // bins_per_slot has a remainder. The severity of the internal
+ // fragmentation is equal to the remainder * the number of slots.
+ // This check prevents internal fragmentation.
+ // This also necessarily limits to bin sizes to one of:
+ // [1,2,4,8,16,32(64-bit system only)] bits
+ BOOST_STATIC_ASSERT( ((sizeof(Block) * 8) % BitsPerBin) == 0);
+
+ public:
+ typedef T value_type;
+ typedef T key_type;
+ typedef HashFunctions hash_function_type;
+ typedef Block block_type;
+ typedef Allocator allocator_type;
+ typedef dynamic_counting_bloom_filter<T, BitsPerBin,
+ HashFunctions,
+ Block, Allocator> this_type;
+
+ typedef std::vector<Block, Allocator> bucket_type;
+ typedef typename bucket_type::iterator bucket_iterator;
+ typedef typename bucket_type::const_iterator bucket_const_iterator;
+
+ static const slot_bits = sizeof(block_type) * 8;
+ static const size_t default_num_bins = 10000;
+
+ private:
+ size_t bucket_size(const size_t num_bins) const {
+ const size_t bin_bits = num_bins * this->bits_per_bin();
+ return bin_bits / slot_bits + 1;
+ }
+
+ public:
+ //! constructors
+ dynamic_counting_bloom_filter()
+ : bits(bucket_size(default_num_bins))
+ {
+ this->clear();
+ }
+
+ explicit dynamic_counting_bloom_filter(const size_t num_bins)
+ : bits(bucket_size(num_bins))
+ {
+ this->clear();
+ }
+
+ template <typename InputIterator>
+ dynamic_counting_bloom_filter(const InputIterator start,
+ const InputIterator end)
+ : bits(bucket_size(std::distance(start, end) * 4))
+ {
+ this->clear();
+
+ for (InputIterator i = start; i != end; ++i)
+ this->insert(*i);
+ }
+
+ //! meta functions
+ size_t num_bins() const
+ {
+ return NumBins;
+ }
+
+ static BOOST_CONSTEXPR size_t bits_per_bin()
+ {
+ return BitsPerBin;
+ }
+
+ static BOOST_CONSTEXPR size_t bins_per_slot()
+ {
+ return sizeof(block_type) * 8 / BitsPerBin;
+ }
+
+ static BOOST_CONSTEXPR size_t mask()
+ {
+ return static_cast<Block>(0 - 1) >> (sizeof(Block) * 8 - BitsPerBin);
+ }
+
+ size_t bit_capacity() const
+ {
+ return this->num_bins() * BitsPerBin;
+ }
+
+ static BOOST_CONSTEXPR size_t num_hash_functions()
+ {
+ return mpl::size<HashFunctions>::value;
+ }
+
+ double false_positive_rate() const
+ {
+ const double n = static_cast<double>(this->count());
+ static const double k = static_cast<double>(num_hash_functions());
+ static const double m = static_cast<double>(NumBins);
+ static const double e =
+ 2.718281828459045235360287471352662497757247093699959574966;
+ return std::pow(1 - std::pow(e, -k * n / m), k);
+ }
+
+ //? returns the number of bins that have at least 1 bit set
+ size_t count() const
+ {
+ size_t ret = 0;
+
+ for (bucket_const_iterator i = this->bits.begin(),
+ end = this->bits.end();
+ i != end; ++i) {
+ for (size_t bin = 0; bin < this->bins_per_slot(); ++bin) {
+ const size_t offset_bits = bin * BitsPerBin;
+ const size_t target_bits = (*i >> offset_bits) & this->mask();
+
+ if (target_bits > 0)
+ ++ret;
+ }
+ }
+
+ return ret;
+ }
+
+ bool empty() const
+ {
+ return this->count() == 0;
+ }
+
+ //! core ops
+ void insert(const T& t)
+ {
+ static const unsigned N = boost::mpl::size<hash_function_type>::value - 1;
+ detail::counting_apply_hash<N, this_type>::insert(t, this->bits);
+ }
+
+ template <typename InputIterator>
+ void insert(const InputIterator start, const InputIterator end)
+ {
+ for (InputIterator i = start; i != end; ++i) {
+ this->insert(*i);
+ }
+ }
+
+ void remove(const T& t)
+ {
+ static const unsigned N = boost::mpl::size<hash_function_type>::value - 1;
+ detail::counting_apply_hash<N, this_type>::remove(t, this->bits);
+ }
+
+ template <typename InputIterator>
+ void remove(const InputIterator start, const InputIterator end)
+ {
+ for (InputIterator i = start; i != end; ++i) {
+ this->remove(*i);
+ }
+ }
+
+ bool probably_contains(const T& t) const
+ {
+ static const unsigned N = mpl::size<HashFunctions>::value - 1;
+ return detail::counting_apply_hash<N, this_type>::contains(t,
+ this->bits);
+ }
+
+ //! auxiliary ops
+ void clear()
+ {
+ this->bits.clear();
+ }
+
+ void swap(counting_bloom_filter& other)
+ {
+ counting_bloom_filter tmp = other;
+ other = *this;
+ *this = tmp;
+ }
+
+ //! pairwise ops
+ counting_bloom_filter&
+ experimental_union_assign(const counting_bloom_filter& rhs)
+ {
+ if (this->bit_capacity() != rhs.bit_capacity())
+ throw detail::incompatible_size_exception();
+
+ bucket_iterator this_end = this->bits.end();
+ bucket_iterator this_start = this->bits.begin();
+ bucket_const_iterator rhs_start = rhs.bits.begin();
+
+ for (; this_start != this_end; ++this_start, ++rhs_start) {
+ *this_start |= *rhs_start;
+ }
+
+ return *this;
+ }
+
+ counting_bloom_filter&
+ experimental_intersect_assign(const counting_bloom_filter& rhs)
+ {
+ if (this->bit_capacity() != rhs.bit_capacity())
+ throw detail::incompatible_size_exception();
+
+ bucket_iterator this_end = this->bits.end();
+ bucket_iterator this_start = this->bits.begin();
+ bucket_const_iterator rhs_start = rhs.bits.begin();
+
+ for (; this_start != this_end; ++this_start, ++rhs_start) {
+ *this_start &= *rhs_start;
+ }
+
+ return *this;
+ }
+
+ // equality comparison operators
+ template <typename _T, size_t _BitsPerBin,
+ typename _HashFns, typename _Block, typename _Allocator>
+ friend bool
+ operator==(const dynamic_counting_bloom_filter<_T, _BitsPerBin,
+ _HashFns, _Block,
+ _Allocator>& lhs,
+ const dynamic_counting_bloom_filter<_T, _BitsPerBin,
+ _HashFns, _Block,
+ _Allocator>& rhs);
+
+ template <typename _T, size_t _BitsPerBin,
+ typename _HashFns, typename _Block, typename _Allocator>
+ friend bool
+ operator!=(const dynamic_counting_bloom_filter<_T, _BitsPerBin,
+ _HashFns, _Block,
+ _Allocator>& lhs,
+ const dynamic_counting_bloom_filter<_T, _BitsPerBin,
+ _HashFns, _Block,
+ _Allocator>& rhs);
+
+
+ private:
+ bucket_type bits;
+ };
+
+ // union
+ template<class T, size_t BitsPerBin, class HashFunctions,
+ typename Block, typename Allocator>
+ dynamic_counting_bloom_filter<T, BitsPerBin,
+ HashFunctions, Block, Allocator>
+ experimental_union(const dynamic_counting_bloom_filter<T, BitsPerBin,
+ HashFunctions,
+ Block,
+ Allocator>& lhs,
+ const dynamic_counting_bloom_filter<T, BitsPerBin,
+ HashFunctions,
+ Block,
+ Allocator>& rhs)
+ {
+ dynamic_counting_bloom_filter<T, BitsPerBin,
+ HashFunctions, Block,
+ Allocator> ret(lhs);
+ ret.experimental_union_assign(rhs);
+ return ret;
+ }
+
+ // intersection
+ template<class T, size_t BitsPerBin, class HashFunctions,
+ typename Block, typename Allocator>
+ dynamic_counting_bloom_filter<T, BitsPerBin, HashFunctions,
+ Block, Allocator>
+ experimental_intersect(const dynamic_counting_bloom_filter<T, BitsPerBin,
+ HashFunctions,
+ Block,
+ Allocator>& lhs,
+ const dynamic_counting_bloom_filter<T, BitsPerBin,
+ HashFunctions,
+ Block,
+ Allocator>& rhs)
+ {
+ dynamic_counting_bloom_filter<T, BitsPerBin,
+ HashFunctions, Block,
+ Allocator> ret(lhs);
+ ret.experimental_intersect_assign(rhs);
+ return ret;
+ }
+
+ template<class T, size_t BitsPerBin, class HashFunctions,
+ typename Block, typename Allocator>
+ void
+ swap(dynamic_counting_bloom_filter<T, BitsPerBin,
+ HashFunctions, Block,
+ Allocator>& lhs,
+ dynamic_counting_bloom_filter<T, BitsPerBin,
+ HashFunctions, Block,
+ Allocator>& rhs)
+
+ {
+ lhs.swap(rhs);
+ }
+
+ template<class T, size_t BitsPerBin, class HashFunctions,
+ typename Block, typename Allocator>
+ bool
+ operator==(const dynamic_counting_bloom_filter<T, BitsPerBin,
+ HashFunctions,
+ Block,
+ Allocator>& lhs,
+ const dynamic_counting_bloom_filter<T, BitsPerBin,
+ HashFunctions,
+ Block,
+ Allocator>& rhs)
+ {
+ if (lhs.bit_capacity() != rhs.bit_capacity())
+ throw detail::incompatible_size_exception();
+
+ return (lhs.bits == rhs.bits);
+ }
+
+ template<class T, size_t BitsPerBin, class HashFunctions,
+ typename Block, typename Allocator>
+ bool
+ operator!=(const dynamic_counting_bloom_filter<T, BitsPerBin,
+ HashFunctions,
+ Block,
+ Allocator>& lhs,
+ const dynamic_counting_bloom_filter<T, BitsPerBin,
+ HashFunctions,
+ Block,
+ Allocator>& rhs)
+ {
+ if (lhs.bit_capacity() != rhs.bit_capacity())
+ throw detail::incompatible_size_exception();
+
+ return !(lhs == rhs);
+ }
+
+ } // namespace bloom_filter
+} // namespace boost
+#endif


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk