Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r72333 - in sandbox/bloom_filter/trunk: boost/bloom_filter libs/bloom_filter/test
From: cpp.cabrera_at_[hidden]
Date: 2011-06-01 12:32:53


Author: alejandro
Date: 2011-06-01 12:32:52 EDT (Wed, 01 Jun 2011)
New Revision: 72333
URL: http://svn.boost.org/trac/boost/changeset/72333

Log:
Made test suite more robust. Removed deprecated tests (operator[], benchmark). Fixed test case erroneously reporting intersection operators as broken. Added false_positive_rate, num_hash_functions, and count functions to bloom filter interface.
Text files modified:
   sandbox/bloom_filter/trunk/boost/bloom_filter/bloom.hpp | 18 ++++++++
   sandbox/bloom_filter/trunk/libs/bloom_filter/test/boost_test.cpp | 83 ++++++++++++++++++++++++++++++++-------
   2 files changed, 85 insertions(+), 16 deletions(-)

Modified: sandbox/bloom_filter/trunk/boost/bloom_filter/bloom.hpp
==============================================================================
--- sandbox/bloom_filter/trunk/boost/bloom_filter/bloom.hpp (original)
+++ sandbox/bloom_filter/trunk/boost/bloom_filter/bloom.hpp 2011-06-01 12:32:52 EDT (Wed, 01 Jun 2011)
@@ -16,6 +16,7 @@
  * \brief A generic Bloom filter providing compile-time unrolling
  * of hash function application.
  */
+#include <cmath>
 #include <bitset>
 
 #include <boost/config.hpp>
@@ -38,6 +39,23 @@
     BOOST_CONSTEXPR size_t size() const {
       return Size;
     }
+
+ BOOST_CONSTEXPR size_t num_hash_functions() const {
+ return mpl::size<HashFunctions>::value;
+ };
+
+ double false_positive_rate() const {
+ const double n = static_cast<double>(this->bits.count());
+ static const double k = static_cast<double>(num_hash_functions());
+ static const double m = static_cast<double>(Size);
+ static const double e =
+ 2.718281828459045235360287471352662497757247093699959574966;
+ return std::pow(1 - std::pow(e, -k * n / m), k);
+ };
+
+ size_t count() const {
+ return this->bits.count();
+ };
     
     void insert(const T& t) {
       static const unsigned N = mpl::size<HashFunctions>::value - 1;

Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/test/boost_test.cpp
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/test/boost_test.cpp (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/test/boost_test.cpp 2011-06-01 12:32:52 EDT (Wed, 01 Jun 2011)
@@ -15,6 +15,7 @@
 
 #include <boost/bloom_filter/bloom.hpp>
 #include <boost/test/unit_test.hpp>
+#include <boost/test/floating_point_comparison.hpp>
 
 using boost::bloom_filter;
 using boost::BoostHash;
@@ -71,18 +72,60 @@
   BOOST_CHECK_EQUAL(bloom_2048.size(), 2048ul);
 }
 
-BOOST_AUTO_TEST_CASE(contains) {
- bloom_filter<size_t, 8> bloom;
+BOOST_AUTO_TEST_CASE(numHashFunctions) {
+ bloom_filter<size_t, 8> bloom_3;
+ bloom_filter<size_t, 8, boost::mpl::vector<
+ BoostHash<size_t, 1>,
+ BoostHash<size_t, 2>>> bloom_2;
+ bloom_filter<size_t, 8, boost::mpl::vector<
+ BoostHash<size_t, 1>,
+ BoostHash<size_t, 2>,
+ BoostHash<size_t, 3>,
+ BoostHash<size_t, 4>,
+ BoostHash<size_t, 5>,
+ BoostHash<size_t, 6>,
+ BoostHash<size_t, 7>>> bloom_7;
+
+ BOOST_CHECK_EQUAL(bloom_3.num_hash_functions(), 3);
+ BOOST_CHECK_EQUAL(bloom_2.num_hash_functions(), 2);
+ BOOST_CHECK_EQUAL(bloom_7.num_hash_functions(), 7);
+}
 
+BOOST_AUTO_TEST_CASE(falsePositiveRate) {
+ bloom_filter<size_t, 64> bloom;
+
+ BOOST_CHECK_EQUAL(bloom.false_positive_rate(), 0.0);
+
   bloom.insert(1);
- BOOST_CHECK_EQUAL(bloom.contains(1), true);
+ BOOST_CHECK_CLOSE(bloom.false_positive_rate(), 0.002257625907, 0.0001);
+
+ bloom.insert(2);
+ BOOST_CHECK_LT(bloom.false_positive_rate(), 0.014736);
+
+ bloom.insert(3);
+ BOOST_CHECK_LT(bloom.false_positive_rate(), 0.040773);
+
+ bloom.insert(4);
+ BOOST_CHECK_LT(bloom.false_positive_rate(), 0.0796276);
+
+ bloom.insert(5);
+ BOOST_CHECK_LT(bloom.false_positive_rate(), 0.12877);
+
+ bloom.insert(6);
+ BOOST_CHECK_LT(bloom.false_positive_rate(), 0.185102);
+
+ for (size_t i = 7; i < 5000; ++i)
+ bloom.insert(i);
+ BOOST_CHECK_LE(bloom.false_positive_rate(), 1.0);
 }
 
-BOOST_AUTO_TEST_CASE(containsOperator) {
+BOOST_AUTO_TEST_CASE(contains) {
   bloom_filter<size_t, 8> bloom;
 
   bloom.insert(1);
   BOOST_CHECK_EQUAL(bloom.contains(1), true);
+ BOOST_CHECK_LE(bloom.count(), 3);
+ BOOST_CHECK_GE(bloom.count(), 1);
 }
 
 BOOST_AUTO_TEST_CASE(doesNotContain) {
@@ -108,6 +151,7 @@
 
   bloom.clear();
   BOOST_CHECK_EQUAL(bloom.contains(1), false);
+ BOOST_CHECK_EQUAL(bloom.count(), 0);
 }
 
 BOOST_AUTO_TEST_CASE(testUnion) {
@@ -125,27 +169,31 @@
 
   for (size_t i = 0; i < 200; ++i)
     BOOST_CHECK_EQUAL(bloom_union.contains(i), true);
+ BOOST_CHECK_GE(bloom_union.count(), bloom_1.count());
+ BOOST_CHECK_GE(bloom_union.count(), bloom_2.count());
 }
 
 BOOST_AUTO_TEST_CASE(testUnionAssign) {
   bloom_filter<size_t, 32> bloom_1;
- bloom_filter<size_t, 32> bloom_2;
+ bloom_filter<size_t, 32> bloom_union;
 
   for (size_t i = 0; i < 100; ++i)
     bloom_1.insert(i);
   
- bloom_2 |= bloom_1;
+ bloom_union |= bloom_1;
 
   for (size_t i = 0; i < 100; ++i)
- BOOST_CHECK_EQUAL(bloom_2.contains(i), true);
+ BOOST_CHECK_EQUAL(bloom_union.contains(i), true);
+ BOOST_CHECK_EQUAL(bloom_union.count(), bloom_1.count());
 }
 
 BOOST_AUTO_TEST_CASE(testIntersect) {
- bloom_filter<size_t, 20000> bloom_1;
- bloom_filter<size_t, 20000> bloom_2;
- bloom_filter<size_t, 20000> bloom_intersect;
+ bloom_filter<size_t, 32> bloom_1;
+ bloom_filter<size_t, 32> bloom_2;
+ bloom_filter<size_t, 32> bloom_intersect;
 
- for (size_t i = 0; i < 100; ++i)
+ // overlap at 100
+ for (size_t i = 0; i < 101; ++i)
     bloom_1.insert(i);
   
   for (size_t i = 100; i < 200; ++i)
@@ -153,23 +201,25 @@
 
   bloom_intersect = bloom_1 & bloom_2;
 
- for (size_t i = 0; i < 200; ++i)
- BOOST_CHECK_EQUAL(bloom_intersect.contains(i), false);
+ BOOST_CHECK_LE(bloom_intersect.count(), bloom_1.count());
+ BOOST_CHECK_LE(bloom_intersect.count(), bloom_2.count());
+ BOOST_CHECK_EQUAL(bloom_intersect.contains(100), true);
 }
 
 BOOST_AUTO_TEST_CASE(testIntersectAssign) {
   bloom_filter<size_t, 32> bloom_1;
- bloom_filter<size_t, 32> bloom_2;
+ bloom_filter<size_t, 32> bloom_intersect;
 
   for (size_t i = 0; i < 100; ++i)
     bloom_1.insert(i);
   
- bloom_2 &= bloom_1;
+ bloom_intersect &= bloom_1;
 
   for (size_t i = 0; i < 100; ++i)
- BOOST_CHECK_EQUAL(bloom_2.contains(i), false);
+ BOOST_CHECK_EQUAL(bloom_intersect.contains(i), false);
 }
 
+/*
 BOOST_AUTO_TEST_CASE(collisionBenchmark) {
   typedef boost::mpl::vector<
     OHash <size_t, 2>,
@@ -196,3 +246,4 @@
   std::cout << collisions << " collisions" << std::endl;
   bloom.clear();
 }
+*/


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk