|
Boost-Commit : |
Subject: [Boost-commit] svn:boost r72333 - in sandbox/bloom_filter/trunk: boost/bloom_filter libs/bloom_filter/test
From: cpp.cabrera_at_[hidden]
Date: 2011-06-01 12:32:53
Author: alejandro
Date: 2011-06-01 12:32:52 EDT (Wed, 01 Jun 2011)
New Revision: 72333
URL: http://svn.boost.org/trac/boost/changeset/72333
Log:
Made test suite more robust. Removed deprecated tests (operator[], benchmark). Fixed test case erroneously reporting intersection operators as broken. Added false_positive_rate, num_hash_functions, and count functions to bloom filter interface.
Text files modified:
sandbox/bloom_filter/trunk/boost/bloom_filter/bloom.hpp | 18 ++++++++
sandbox/bloom_filter/trunk/libs/bloom_filter/test/boost_test.cpp | 83 ++++++++++++++++++++++++++++++++-------
2 files changed, 85 insertions(+), 16 deletions(-)
Modified: sandbox/bloom_filter/trunk/boost/bloom_filter/bloom.hpp
==============================================================================
--- sandbox/bloom_filter/trunk/boost/bloom_filter/bloom.hpp (original)
+++ sandbox/bloom_filter/trunk/boost/bloom_filter/bloom.hpp 2011-06-01 12:32:52 EDT (Wed, 01 Jun 2011)
@@ -16,6 +16,7 @@
* \brief A generic Bloom filter providing compile-time unrolling
* of hash function application.
*/
+#include <cmath>
#include <bitset>
#include <boost/config.hpp>
@@ -38,6 +39,23 @@
BOOST_CONSTEXPR size_t size() const {
return Size;
}
+
+ BOOST_CONSTEXPR size_t num_hash_functions() const {
+ return mpl::size<HashFunctions>::value;
+ };
+
+ double false_positive_rate() const {
+ const double n = static_cast<double>(this->bits.count());
+ static const double k = static_cast<double>(num_hash_functions());
+ static const double m = static_cast<double>(Size);
+ static const double e =
+ 2.718281828459045235360287471352662497757247093699959574966;
+ return std::pow(1 - std::pow(e, -k * n / m), k);
+ };
+
+ size_t count() const {
+ return this->bits.count();
+ };
void insert(const T& t) {
static const unsigned N = mpl::size<HashFunctions>::value - 1;
Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/test/boost_test.cpp
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/test/boost_test.cpp (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/test/boost_test.cpp 2011-06-01 12:32:52 EDT (Wed, 01 Jun 2011)
@@ -15,6 +15,7 @@
#include <boost/bloom_filter/bloom.hpp>
#include <boost/test/unit_test.hpp>
+#include <boost/test/floating_point_comparison.hpp>
using boost::bloom_filter;
using boost::BoostHash;
@@ -71,18 +72,60 @@
BOOST_CHECK_EQUAL(bloom_2048.size(), 2048ul);
}
-BOOST_AUTO_TEST_CASE(contains) {
- bloom_filter<size_t, 8> bloom;
+BOOST_AUTO_TEST_CASE(numHashFunctions) {
+ bloom_filter<size_t, 8> bloom_3;
+ bloom_filter<size_t, 8, boost::mpl::vector<
+ BoostHash<size_t, 1>,
+ BoostHash<size_t, 2>>> bloom_2;
+ bloom_filter<size_t, 8, boost::mpl::vector<
+ BoostHash<size_t, 1>,
+ BoostHash<size_t, 2>,
+ BoostHash<size_t, 3>,
+ BoostHash<size_t, 4>,
+ BoostHash<size_t, 5>,
+ BoostHash<size_t, 6>,
+ BoostHash<size_t, 7>>> bloom_7;
+
+ BOOST_CHECK_EQUAL(bloom_3.num_hash_functions(), 3);
+ BOOST_CHECK_EQUAL(bloom_2.num_hash_functions(), 2);
+ BOOST_CHECK_EQUAL(bloom_7.num_hash_functions(), 7);
+}
+BOOST_AUTO_TEST_CASE(falsePositiveRate) {
+ bloom_filter<size_t, 64> bloom;
+
+ BOOST_CHECK_EQUAL(bloom.false_positive_rate(), 0.0);
+
bloom.insert(1);
- BOOST_CHECK_EQUAL(bloom.contains(1), true);
+ BOOST_CHECK_CLOSE(bloom.false_positive_rate(), 0.002257625907, 0.0001);
+
+ bloom.insert(2);
+ BOOST_CHECK_LT(bloom.false_positive_rate(), 0.014736);
+
+ bloom.insert(3);
+ BOOST_CHECK_LT(bloom.false_positive_rate(), 0.040773);
+
+ bloom.insert(4);
+ BOOST_CHECK_LT(bloom.false_positive_rate(), 0.0796276);
+
+ bloom.insert(5);
+ BOOST_CHECK_LT(bloom.false_positive_rate(), 0.12877);
+
+ bloom.insert(6);
+ BOOST_CHECK_LT(bloom.false_positive_rate(), 0.185102);
+
+ for (size_t i = 7; i < 5000; ++i)
+ bloom.insert(i);
+ BOOST_CHECK_LE(bloom.false_positive_rate(), 1.0);
}
-BOOST_AUTO_TEST_CASE(containsOperator) {
+BOOST_AUTO_TEST_CASE(contains) {
bloom_filter<size_t, 8> bloom;
bloom.insert(1);
BOOST_CHECK_EQUAL(bloom.contains(1), true);
+ BOOST_CHECK_LE(bloom.count(), 3);
+ BOOST_CHECK_GE(bloom.count(), 1);
}
BOOST_AUTO_TEST_CASE(doesNotContain) {
@@ -108,6 +151,7 @@
bloom.clear();
BOOST_CHECK_EQUAL(bloom.contains(1), false);
+ BOOST_CHECK_EQUAL(bloom.count(), 0);
}
BOOST_AUTO_TEST_CASE(testUnion) {
@@ -125,27 +169,31 @@
for (size_t i = 0; i < 200; ++i)
BOOST_CHECK_EQUAL(bloom_union.contains(i), true);
+ BOOST_CHECK_GE(bloom_union.count(), bloom_1.count());
+ BOOST_CHECK_GE(bloom_union.count(), bloom_2.count());
}
BOOST_AUTO_TEST_CASE(testUnionAssign) {
bloom_filter<size_t, 32> bloom_1;
- bloom_filter<size_t, 32> bloom_2;
+ bloom_filter<size_t, 32> bloom_union;
for (size_t i = 0; i < 100; ++i)
bloom_1.insert(i);
- bloom_2 |= bloom_1;
+ bloom_union |= bloom_1;
for (size_t i = 0; i < 100; ++i)
- BOOST_CHECK_EQUAL(bloom_2.contains(i), true);
+ BOOST_CHECK_EQUAL(bloom_union.contains(i), true);
+ BOOST_CHECK_EQUAL(bloom_union.count(), bloom_1.count());
}
BOOST_AUTO_TEST_CASE(testIntersect) {
- bloom_filter<size_t, 20000> bloom_1;
- bloom_filter<size_t, 20000> bloom_2;
- bloom_filter<size_t, 20000> bloom_intersect;
+ bloom_filter<size_t, 32> bloom_1;
+ bloom_filter<size_t, 32> bloom_2;
+ bloom_filter<size_t, 32> bloom_intersect;
- for (size_t i = 0; i < 100; ++i)
+ // overlap at 100
+ for (size_t i = 0; i < 101; ++i)
bloom_1.insert(i);
for (size_t i = 100; i < 200; ++i)
@@ -153,23 +201,25 @@
bloom_intersect = bloom_1 & bloom_2;
- for (size_t i = 0; i < 200; ++i)
- BOOST_CHECK_EQUAL(bloom_intersect.contains(i), false);
+ BOOST_CHECK_LE(bloom_intersect.count(), bloom_1.count());
+ BOOST_CHECK_LE(bloom_intersect.count(), bloom_2.count());
+ BOOST_CHECK_EQUAL(bloom_intersect.contains(100), true);
}
BOOST_AUTO_TEST_CASE(testIntersectAssign) {
bloom_filter<size_t, 32> bloom_1;
- bloom_filter<size_t, 32> bloom_2;
+ bloom_filter<size_t, 32> bloom_intersect;
for (size_t i = 0; i < 100; ++i)
bloom_1.insert(i);
- bloom_2 &= bloom_1;
+ bloom_intersect &= bloom_1;
for (size_t i = 0; i < 100; ++i)
- BOOST_CHECK_EQUAL(bloom_2.contains(i), false);
+ BOOST_CHECK_EQUAL(bloom_intersect.contains(i), false);
}
+/*
BOOST_AUTO_TEST_CASE(collisionBenchmark) {
typedef boost::mpl::vector<
OHash <size_t, 2>,
@@ -196,3 +246,4 @@
std::cout << collisions << " collisions" << std::endl;
bloom.clear();
}
+*/
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk