|
Boost-Commit : |
Subject: [Boost-commit] svn:boost r72720 - in sandbox/bloom_filter/trunk/libs/bloom_filter: doc/html doc/html/reference example test
From: cpp.cabrera_at_[hidden]
Date: 2011-06-22 18:17:50
Author: alejandro
Date: 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
New Revision: 72720
URL: http://svn.boost.org/trac/boost/changeset/72720
Log:
Added empty(). Changed contains() -> probably_contains(). Made relevant documentation, test suite, and example updates.
Text files modified:
sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/history.html | 11 +++++++
sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/index.html | 2
sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/reference/bloom.html | 39 ++++++++++++++++++++++----
sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/tutorial.html | 8 ++--
sandbox/bloom_filter/trunk/libs/bloom_filter/example/advanced_bloom.cpp | 2
sandbox/bloom_filter/trunk/libs/bloom_filter/example/basic_bloom.cpp | 2
sandbox/bloom_filter/trunk/libs/bloom_filter/example/custom_hash.cpp | 2
sandbox/bloom_filter/trunk/libs/bloom_filter/example/string_bloom.cpp | 2
sandbox/bloom_filter/trunk/libs/bloom_filter/example/url_bloom.cpp | 2
sandbox/bloom_filter/trunk/libs/bloom_filter/test/regression.cpp | 58 ++++++++++++++++++++++++---------------
10 files changed, 88 insertions(+), 40 deletions(-)
Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/history.html
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/history.html (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/history.html 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -36,6 +36,17 @@
</div>
<h1 class="title">Version history</h1>
+ <h3>v1.0.1</h3>
+ <ul>
+ <li><strong>API Changes</strong>
+ <ul>
+ <li>contains() -> probably_contains()</li>
+ <li>Added empty()</li>
+ </ul>
+ </li>
+ </ul>
+
+ </ul>
<h2>v1.0</h2>
<ul>
<li>Initial release.</li>
Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/index.html
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/index.html (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/index.html 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -72,7 +72,7 @@
<div>
<p>
- Last revised: <time datetime="2011-06-20">June 20, 2011</time>.
+ Last revised: <time datetime="2011-06-22">June 22, 2011</time>.
</p>
<p class="copyright">
Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/reference/bloom.html
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/reference/bloom.html (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/reference/bloom.html 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -54,10 +54,12 @@
<code class="c_func">bloom_filter</code>();
<code class="c_comment">// data structure metadata query functions</code>
- <code class="c_keyword">static constexpr</code> <code class="c_type">size_t</code> <code class="c_func">size</code>() <code class="c_keyword">const</code>;
+ <code class="c_keyword">static constexpr</code> <code class="c_type">size_t</code> <code class="c_func">bit_capacity</code>() <code class="c_keyword">const</code>;
<code class="c_keyword">static constexpr</code> <code class="c_type">size_t</code> <code class="c_func">num_hash_functions</code>() <code class="c_keyword">const</code>;
<code class="c_type">double</code> <code class="c_func">false_positive_rate</code>() <code class="c_keyword">const</code>;
<code class="c_type">size_t</code> <code class="c_func">count</code>() <code class="c_keyword">const</code>;
+ <code class="c_type">bool</code> <code class="c_func">empty</code>() <code class="c_keyword">const</code>;
+ <code class="c_type">void</code> <code class="c_func">clear</code>()</code>;
<code class="c_comment">// data structures core ops</code>
<code class="c_type">void</code> <code class="c_func">insert</code>(<code class="c_keyword">const</code> <code class="c_type">T</code>&);
@@ -100,12 +102,14 @@
<p>Function Reference</p>
<ul>
<li>bloom_filter()</li>
- <li>size()</li>
+ <li>bit_capacity()</li>
<li>num_hash_functions()</li>
<li>false_positive_rate()</li>
+ <li>empty()</li>
<li>count()</li>
+ <li>clear()</li>
<li>insert()</li>
- <li>contains()</li>
+ <li>probably_contains()</li>
<li>operator|=()</li>
<li>operator&=()</li>
<li>operator|()</li>
@@ -125,8 +129,8 @@
</div>
<div class="func_ref">
- <a name="size"></a>
- <h3><code class="c_type">size_t</code> <code class="c_func">size</code>() <code class="c_keyword">const</code></h3>
+ <a name="bit_capacity"></a>
+ <h3><code class="c_type">size_t</code> <code class="c_func">bit_capacity</code>() <code class="c_keyword">const</code></h3>
<dl>
<dt>Description</dt>
<dd>Returns the number of bits used internally by the Bloom filter..</dd>
@@ -159,6 +163,17 @@
</div>
<div class="func_ref">
+ <a name="empty"></a>
+ <h3><code class="c_type">bool</code> <code class="c_func">empty</code>() <code class="c_keyword">const</code></h3>
+ <dl>
+ <dt>Description</dt>
+ <dd>Returns true if no elements have been inserted into the Bloom filter.</dd>
+ <dt>Returns</dt>
+ <dd>std::bitset.count() == 0</dd>
+ </dl>
+ </div>
+
+ <div class="func_ref">
<a name="count"></a>
<h3><code class="c_type">size_t</code> <code class="c_func">count</code>() <code class="c_keyword">const</code></h3>
<dl>
@@ -169,6 +184,16 @@
</dl>
</div>
+ <div class="func_ref">
+ <a name="clear"></a>
+ <h3><code class="c_type">void</code> <code class="c_func">clear</code>()</h3>
+ <dl>
+ <dt>Description</dt>
+ <dd>Sets all bits in the Bloom filter to 0, effectively canceling all insertions.</dd>
+ <dt>Post-condition</dt>
+ <dd>this->count() == 0</dd>
+ </dl>
+ </div>
<div class="func_ref">
<a name="insert"></a>
@@ -183,8 +208,8 @@
<div class="func_ref">
- <a name="contains"></a>
- <h3><code class="c_type">bool</code> <code class="c_func">contains</code>(<code class="c_keyword">const</code> <code class="c_type">T&</code>) <code class="c_keyword">const</code></h3>
+ <a name="probably_contains"></a>
+ <h3><code class="c_type">bool</code> <code class="c_func">probably_contains</code>(<code class="c_keyword">const</code> <code class="c_type">T&</code>) <code class="c_keyword">const</code></h3>
<dl>
<dt>Description</dt>
<dd>Queries an element in the Bloom filter, checking each hash function value against the set bits.</dd>
Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/tutorial.html
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/tutorial.html (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/tutorial.html 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -51,7 +51,7 @@
<li>Basic functions:
<ul>
<li><code>void insert(const T&);</code></li>
- <li><code>bool contains(const T&);</code></li>
+ <li><code>bool probably_contains(const T&);</code></li>
</ul>
</li>
</ul>
@@ -81,7 +81,7 @@
}
<code class="c_keyword">for</code> (<code class="c_type">int</code><code class="c_id"> i</code> = INSERT_MAX; i < CONTAINS_MAX; ++i) {
- <code class="c_keyword">if</code> (bloom.contains(i)) cout << "collision" << endl;
+ <code class="c_keyword">if</code> (bloom.probably_contains(i)) cout << "collision" << endl;
}
cout << <code class="c_str">"collisions: "</code>
@@ -173,7 +173,7 @@
}
<code class="c_keyword">for</code> (<code class="c_type">int</code><code class="c_id"> i</code> = INSERT_MAX; i < CONTAINS_MAX; ++i) {
- <code class="c_keyword">if</code> (bloom.contains(i)) ++collisions;
+ <code class="c_keyword">if</code> (bloom.probably_contains(i)) ++collisions;
}
cout << <code class="c_str">"false positive rate: "</code>
@@ -197,7 +197,7 @@
<p>
All the details of insertion and querying the Bloom filter are handled
- transparently by the insert() and contains() functions, regardless
+ transparently by the insert() and probably_contains() functions, regardless
of the type, bit size, and hash functions used. This makes it very
convenient to experiment with different Bloom filter parameters in
projects you commit to.
Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/example/advanced_bloom.cpp
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/example/advanced_bloom.cpp (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/example/advanced_bloom.cpp 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -39,7 +39,7 @@
}
for (size_t i = INSERT_MAX; i < CONTAINS_MAX; ++i) {
- if (bloom.contains(i)) ++collisions;
+ if (bloom.probably_contains(i)) ++collisions;
}
cout << "false positive rate after inserts: "
Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/example/basic_bloom.cpp
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/example/basic_bloom.cpp (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/example/basic_bloom.cpp 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -29,7 +29,7 @@
}
for (size_t i = INSERT_MAX; i < CONTAINS_MAX; ++i) {
- if (bloom.contains(i)) ++collisions;
+ if (bloom.probably_contains(i)) ++collisions;
}
cout << "collisions: " << collisions << endl;
Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/example/custom_hash.cpp
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/example/custom_hash.cpp (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/example/custom_hash.cpp 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -71,7 +71,7 @@
}
for (size_t i = INSERT_MAX; i < CONTAINS_MAX; ++i) {
- if (bloom.contains(gen_url(i))) ++collisions;
+ if (bloom.probably_contains(gen_url(i))) ++collisions;
}
cout << "collisions: " << collisions << endl;
Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/example/string_bloom.cpp
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/example/string_bloom.cpp (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/example/string_bloom.cpp 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -43,7 +43,7 @@
}
for (size_t i = INSERT_MAX; i < CONTAINS_MAX; ++i) {
- if (bloom.contains(gen_string(i))) ++collisions;
+ if (bloom.probably_contains(gen_string(i))) ++collisions;
}
cout << "collisions: " << collisions << endl;
Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/example/url_bloom.cpp
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/example/url_bloom.cpp (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/example/url_bloom.cpp 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -45,7 +45,7 @@
}
for (size_t i = INSERT_MAX; i < CONTAINS_MAX; ++i) {
- if (bloom.contains(gen_url(i))) ++collisions;
+ if (bloom.probably_contains(gen_url(i))) ++collisions;
}
cout << "collisions: " << collisions << endl;
Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/test/regression.cpp
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/test/regression.cpp (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/test/regression.cpp 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -37,24 +37,34 @@
for (size_t i = 0; i < 200; ++i) {
bloom1.insert(i);
- BOOST_CHECK_EQUAL(bloom1.contains(i), true);
+ BOOST_CHECK_EQUAL(bloom1.probably_contains(i), true);
}
bloom2 = bloom1;
for (size_t i = 0; i < 200; ++i) {
- BOOST_CHECK_EQUAL(bloom2.contains(i), true);
+ BOOST_CHECK_EQUAL(bloom2.probably_contains(i), true);
}
}
-BOOST_AUTO_TEST_CASE(size) {
+BOOST_AUTO_TEST_CASE(bit_capacity) {
bloom_filter<size_t, 8> bloom_8;
bloom_filter<size_t, 256> bloom_256;
bloom_filter<size_t, 2048> bloom_2048;
- BOOST_CHECK_EQUAL(bloom_8.size(), 8ul);
- BOOST_CHECK_EQUAL(bloom_256.size(), 256ul);
- BOOST_CHECK_EQUAL(bloom_2048.size(), 2048ul);
+ BOOST_CHECK_EQUAL(bloom_8.bit_capacity(), 8ul);
+ BOOST_CHECK_EQUAL(bloom_256.bit_capacity(), 256ul);
+ BOOST_CHECK_EQUAL(bloom_2048.bit_capacity(), 2048ul);
+}
+
+BOOST_AUTO_TEST_CASE(empty) {
+ bloom_filter<size_t, 8> bloom;
+
+ BOOST_CHECK_EQUAL(bloom.empty(), true);
+ bloom.insert(1);
+ BOOST_CHECK_EQUAL(bloom.empty(), false);
+ bloom.clear();
+ BOOST_CHECK_EQUAL(bloom.empty(), true);
}
BOOST_AUTO_TEST_CASE(numHashFunctions) {
@@ -71,7 +81,7 @@
boost_hash<size_t, 6>,
boost_hash<size_t, 7> > > bloom_7;
- BOOST_CHECK_EQUAL(bloom_3.num_hash_functions(), 3ul);
+ BOOST_CHECK_EQUAL(bloom_3.num_hash_functions(), 1ul);
BOOST_CHECK_EQUAL(bloom_2.num_hash_functions(), 2ul);
BOOST_CHECK_EQUAL(bloom_7.num_hash_functions(), 7ul);
}
@@ -82,33 +92,35 @@
BOOST_CHECK_EQUAL(bloom.false_positive_rate(), 0.0);
bloom.insert(1);
- BOOST_CHECK_CLOSE(bloom.false_positive_rate(), 0.002257625907, 0.0001);
+ BOOST_CHECK_CLOSE(bloom.false_positive_rate(), 0.015504, .01);
bloom.insert(2);
- BOOST_CHECK_LT(bloom.false_positive_rate(), 0.014736);
+ BOOST_CHECK_CLOSE(bloom.false_positive_rate(), 0.030768, .01);
bloom.insert(3);
- BOOST_CHECK_LT(bloom.false_positive_rate(), 0.040773);
+ BOOST_CHECK_CLOSE(bloom.false_positive_rate(), 0.045794, .01);
bloom.insert(4);
- BOOST_CHECK_LT(bloom.false_positive_rate(), 0.0796276);
+ BOOST_CHECK_CLOSE(bloom.false_positive_rate(), 0.060588, .01);
bloom.insert(5);
- BOOST_CHECK_LT(bloom.false_positive_rate(), 0.12877);
+ BOOST_CHECK_CLOSE(bloom.false_positive_rate(), 0.075151, .01);
bloom.insert(6);
- BOOST_CHECK_LT(bloom.false_positive_rate(), 0.185102);
+ BOOST_CHECK_CLOSE(bloom.false_positive_rate(), 0.089491, .01);
for (size_t i = 7; i < 5000; ++i)
bloom.insert(i);
+
+ BOOST_CHECK_GE(bloom.false_positive_rate(), 0.6);
BOOST_CHECK_LE(bloom.false_positive_rate(), 1.0);
}
-BOOST_AUTO_TEST_CASE(contains) {
+BOOST_AUTO_TEST_CASE(probably_contains) {
bloom_filter<size_t, 8> bloom;
bloom.insert(1);
- BOOST_CHECK_EQUAL(bloom.contains(1), true);
+ BOOST_CHECK_EQUAL(bloom.probably_contains(1), true);
BOOST_CHECK_LE(bloom.count(), 3ul);
BOOST_CHECK_GE(bloom.count(), 1ul);
}
@@ -116,7 +128,7 @@
BOOST_AUTO_TEST_CASE(doesNotContain) {
bloom_filter<size_t, 8> bloom;
- BOOST_CHECK_EQUAL(bloom.contains(1), false);
+ BOOST_CHECK_EQUAL(bloom.probably_contains(1), false);
}
BOOST_AUTO_TEST_CASE(insertNoFalseNegatives) {
@@ -124,7 +136,7 @@
for (size_t i = 0; i < 100; ++i) {
bloom.insert(i);
- BOOST_CHECK_EQUAL(bloom.contains(i), true);
+ BOOST_CHECK_EQUAL(bloom.probably_contains(i), true);
}
}
@@ -135,7 +147,7 @@
bloom.insert(i);
bloom.clear();
- BOOST_CHECK_EQUAL(bloom.contains(1), false);
+ BOOST_CHECK_EQUAL(bloom.probably_contains(1), false);
BOOST_CHECK_EQUAL(bloom.count(), 0ul);
}
@@ -153,7 +165,7 @@
bloom_union = bloom_1 | bloom_2;
for (size_t i = 0; i < 200; ++i)
- BOOST_CHECK_EQUAL(bloom_union.contains(i), true);
+ BOOST_CHECK_EQUAL(bloom_union.probably_contains(i), true);
BOOST_CHECK_GE(bloom_union.count(), bloom_1.count());
BOOST_CHECK_GE(bloom_union.count(), bloom_2.count());
}
@@ -168,7 +180,7 @@
bloom_union |= bloom_1;
for (size_t i = 0; i < 100; ++i)
- BOOST_CHECK_EQUAL(bloom_union.contains(i), true);
+ BOOST_CHECK_EQUAL(bloom_union.probably_contains(i), true);
BOOST_CHECK_EQUAL(bloom_union.count(), bloom_1.count());
}
@@ -188,7 +200,7 @@
BOOST_CHECK_LE(bloom_intersect.count(), bloom_1.count());
BOOST_CHECK_LE(bloom_intersect.count(), bloom_2.count());
- BOOST_CHECK_EQUAL(bloom_intersect.contains(100), true);
+ BOOST_CHECK_EQUAL(bloom_intersect.probably_contains(100), true);
}
BOOST_AUTO_TEST_CASE(testIntersectAssign) {
@@ -201,7 +213,7 @@
bloom_intersect &= bloom_1;
for (size_t i = 0; i < 100; ++i)
- BOOST_CHECK_EQUAL(bloom_intersect.contains(i), false);
+ BOOST_CHECK_EQUAL(bloom_intersect.probably_contains(i), false);
}
/*
@@ -225,7 +237,7 @@
std::cout << "bloom size " << bloom.size() << std::endl;
bloom.insert(INSERT_VAL);
for (size_t i = 0; i < SEARCH_SPACE; ++i) {
- if (bloom.contains(i) && i != INSERT_VAL) ++collisions;
+ if (bloom.probably_contains(i) && i != INSERT_VAL) ++collisions;
}
std::cout << collisions << " collisions" << std::endl;
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk