Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r72720 - in sandbox/bloom_filter/trunk/libs/bloom_filter: doc/html doc/html/reference example test
From: cpp.cabrera_at_[hidden]
Date: 2011-06-22 18:17:50


Author: alejandro
Date: 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
New Revision: 72720
URL: http://svn.boost.org/trac/boost/changeset/72720

Log:
Added empty(). Changed contains() -> probably_contains(). Made relevant documentation, test suite, and example updates.
Text files modified:
   sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/history.html | 11 +++++++
   sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/index.html | 2
   sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/reference/bloom.html | 39 ++++++++++++++++++++++----
   sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/tutorial.html | 8 ++--
   sandbox/bloom_filter/trunk/libs/bloom_filter/example/advanced_bloom.cpp | 2
   sandbox/bloom_filter/trunk/libs/bloom_filter/example/basic_bloom.cpp | 2
   sandbox/bloom_filter/trunk/libs/bloom_filter/example/custom_hash.cpp | 2
   sandbox/bloom_filter/trunk/libs/bloom_filter/example/string_bloom.cpp | 2
   sandbox/bloom_filter/trunk/libs/bloom_filter/example/url_bloom.cpp | 2
   sandbox/bloom_filter/trunk/libs/bloom_filter/test/regression.cpp | 58 ++++++++++++++++++++++++---------------
   10 files changed, 88 insertions(+), 40 deletions(-)

Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/history.html
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/history.html (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/history.html 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -36,6 +36,17 @@
     </div>
 
     <h1 class="title">Version history</h1>
+ <h3>v1.0.1</h3>
+ <ul>
+ <li><strong>API Changes</strong>
+ <ul>
+ <li>contains() -> probably_contains()</li>
+ <li>Added empty()</li>
+ </ul>
+ </li>
+ </ul>
+
+ </ul>
     <h2>v1.0</h2>
     <ul>
       <li>Initial release.</li>

Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/index.html
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/index.html (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/index.html 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -72,7 +72,7 @@
 
     <div>
       <p>
- Last revised: <time datetime="2011-06-20">June 20, 2011</time>.
+ Last revised: <time datetime="2011-06-22">June 22, 2011</time>.
       </p>
 
       <p class="copyright">

Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/reference/bloom.html
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/reference/bloom.html (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/reference/bloom.html 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -54,10 +54,12 @@
         <code class="c_func">bloom_filter</code>();
 
         <code class="c_comment">// data structure metadata query functions</code>
- <code class="c_keyword">static constexpr</code> <code class="c_type">size_t</code> <code class="c_func">size</code>() <code class="c_keyword">const</code>;
+ <code class="c_keyword">static constexpr</code> <code class="c_type">size_t</code> <code class="c_func">bit_capacity</code>() <code class="c_keyword">const</code>;
         <code class="c_keyword">static constexpr</code> <code class="c_type">size_t</code> <code class="c_func">num_hash_functions</code>() <code class="c_keyword">const</code>;
         <code class="c_type">double</code> <code class="c_func">false_positive_rate</code>() <code class="c_keyword">const</code>;
         <code class="c_type">size_t</code> <code class="c_func">count</code>() <code class="c_keyword">const</code>;
+ <code class="c_type">bool</code> <code class="c_func">empty</code>() <code class="c_keyword">const</code>;
+ <code class="c_type">void</code> <code class="c_func">clear</code>()</code>;
 
         <code class="c_comment">// data structures core ops</code>
         <code class="c_type">void</code> <code class="c_func">insert</code>(<code class="c_keyword">const</code> <code class="c_type">T</code>&amp;);
@@ -100,12 +102,14 @@
       <p>Function Reference</p>
       <ul>
         <li>bloom_filter()</li>
- <li>size()</li>
+ <li>bit_capacity()</li>
         <li>num_hash_functions()</li>
         <li>false_positive_rate()</li>
+ <li>empty()</li>
         <li>count()</li>
+ <li>clear()</li>
         <li>insert()</li>
- <li>contains()</li>
+ <li>probably_contains()</li>
         <li>operator|=()</li>
         <li>operator&=()</li>
         <li>operator|()</li>
@@ -125,8 +129,8 @@
     </div>
 
     <div class="func_ref">
- <a name="size"></a>
- <h3><code class="c_type">size_t</code> <code class="c_func">size</code>() <code class="c_keyword">const</code></h3>
+ <a name="bit_capacity"></a>
+ <h3><code class="c_type">size_t</code> <code class="c_func">bit_capacity</code>() <code class="c_keyword">const</code></h3>
       <dl>
         <dt>Description</dt>
         <dd>Returns the number of bits used internally by the Bloom filter..</dd>
@@ -159,6 +163,17 @@
     </div>
 
     <div class="func_ref">
+ <a name="empty"></a>
+ <h3><code class="c_type">bool</code> <code class="c_func">empty</code>() <code class="c_keyword">const</code></h3>
+ <dl>
+ <dt>Description</dt>
+ <dd>Returns true if no elements have been inserted into the Bloom filter.</dd>
+ <dt>Returns</dt>
+ <dd>std::bitset.count() == 0</dd>
+ </dl>
+ </div>
+
+ <div class="func_ref">
       <a name="count"></a>
       <h3><code class="c_type">size_t</code> <code class="c_func">count</code>() <code class="c_keyword">const</code></h3>
       <dl>
@@ -169,6 +184,16 @@
       </dl>
     </div>
 
+ <div class="func_ref">
+ <a name="clear"></a>
+ <h3><code class="c_type">void</code> <code class="c_func">clear</code>()</h3>
+ <dl>
+ <dt>Description</dt>
+ <dd>Sets all bits in the Bloom filter to 0, effectively canceling all insertions.</dd>
+ <dt>Post-condition</dt>
+ <dd>this->count() == 0</dd>
+ </dl>
+ </div>
 
     <div class="func_ref">
       <a name="insert"></a>
@@ -183,8 +208,8 @@
 
 
     <div class="func_ref">
- <a name="contains"></a>
- <h3><code class="c_type">bool</code> <code class="c_func">contains</code>(<code class="c_keyword">const</code> <code class="c_type">T&amp;</code>) <code class="c_keyword">const</code></h3>
+ <a name="probably_contains"></a>
+ <h3><code class="c_type">bool</code> <code class="c_func">probably_contains</code>(<code class="c_keyword">const</code> <code class="c_type">T&amp;</code>) <code class="c_keyword">const</code></h3>
       <dl>
         <dt>Description</dt>
         <dd>Queries an element in the Bloom filter, checking each hash function value against the set bits.</dd>

Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/tutorial.html
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/tutorial.html (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/doc/html/tutorial.html 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -51,7 +51,7 @@
       <li>Basic functions:
       <ul>
         <li><code>void insert(const T&amp;);</code></li>
- <li><code>bool contains(const T&amp;);</code></li>
+ <li><code>bool probably_contains(const T&amp;);</code></li>
       </ul>
       </li>
     </ul>
@@ -81,7 +81,7 @@
         }
 
         <code class="c_keyword">for</code> (<code class="c_type">int</code><code class="c_id"> i</code> = INSERT_MAX; i &lt; CONTAINS_MAX; ++i) {
- <code class="c_keyword">if</code> (bloom.contains(i)) cout &lt;&lt; "collision" &lt;&lt; endl;
+ <code class="c_keyword">if</code> (bloom.probably_contains(i)) cout &lt;&lt; "collision" &lt;&lt; endl;
         }
 
         cout &lt;&lt; <code class="c_str">"collisions: "</code>
@@ -173,7 +173,7 @@
         }
 
         <code class="c_keyword">for</code> (<code class="c_type">int</code><code class="c_id"> i</code> = INSERT_MAX; i &lt; CONTAINS_MAX; ++i) {
- <code class="c_keyword">if</code> (bloom.contains(i)) ++collisions;
+ <code class="c_keyword">if</code> (bloom.probably_contains(i)) ++collisions;
         }
 
         cout &lt;&lt; <code class="c_str">"false positive rate: "</code>
@@ -197,7 +197,7 @@
 
     <p>
       All the details of insertion and querying the Bloom filter are handled
- transparently by the insert() and contains() functions, regardless
+ transparently by the insert() and probably_contains() functions, regardless
       of the type, bit size, and hash functions used. This makes it very
       convenient to experiment with different Bloom filter parameters in
       projects you commit to.

Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/example/advanced_bloom.cpp
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/example/advanced_bloom.cpp (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/example/advanced_bloom.cpp 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -39,7 +39,7 @@
   }
 
   for (size_t i = INSERT_MAX; i < CONTAINS_MAX; ++i) {
- if (bloom.contains(i)) ++collisions;
+ if (bloom.probably_contains(i)) ++collisions;
   }
 
   cout << "false positive rate after inserts: "

Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/example/basic_bloom.cpp
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/example/basic_bloom.cpp (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/example/basic_bloom.cpp 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -29,7 +29,7 @@
   }
 
   for (size_t i = INSERT_MAX; i < CONTAINS_MAX; ++i) {
- if (bloom.contains(i)) ++collisions;
+ if (bloom.probably_contains(i)) ++collisions;
   }
 
   cout << "collisions: " << collisions << endl;

Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/example/custom_hash.cpp
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/example/custom_hash.cpp (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/example/custom_hash.cpp 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -71,7 +71,7 @@
   }
 
   for (size_t i = INSERT_MAX; i < CONTAINS_MAX; ++i) {
- if (bloom.contains(gen_url(i))) ++collisions;
+ if (bloom.probably_contains(gen_url(i))) ++collisions;
   }
 
   cout << "collisions: " << collisions << endl;

Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/example/string_bloom.cpp
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/example/string_bloom.cpp (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/example/string_bloom.cpp 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -43,7 +43,7 @@
   }
 
   for (size_t i = INSERT_MAX; i < CONTAINS_MAX; ++i) {
- if (bloom.contains(gen_string(i))) ++collisions;
+ if (bloom.probably_contains(gen_string(i))) ++collisions;
   }
 
   cout << "collisions: " << collisions << endl;

Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/example/url_bloom.cpp
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/example/url_bloom.cpp (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/example/url_bloom.cpp 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -45,7 +45,7 @@
   }
 
   for (size_t i = INSERT_MAX; i < CONTAINS_MAX; ++i) {
- if (bloom.contains(gen_url(i))) ++collisions;
+ if (bloom.probably_contains(gen_url(i))) ++collisions;
   }
 
   cout << "collisions: " << collisions << endl;

Modified: sandbox/bloom_filter/trunk/libs/bloom_filter/test/regression.cpp
==============================================================================
--- sandbox/bloom_filter/trunk/libs/bloom_filter/test/regression.cpp (original)
+++ sandbox/bloom_filter/trunk/libs/bloom_filter/test/regression.cpp 2011-06-22 18:17:48 EDT (Wed, 22 Jun 2011)
@@ -37,24 +37,34 @@
 
   for (size_t i = 0; i < 200; ++i) {
     bloom1.insert(i);
- BOOST_CHECK_EQUAL(bloom1.contains(i), true);
+ BOOST_CHECK_EQUAL(bloom1.probably_contains(i), true);
   }
 
   bloom2 = bloom1;
 
   for (size_t i = 0; i < 200; ++i) {
- BOOST_CHECK_EQUAL(bloom2.contains(i), true);
+ BOOST_CHECK_EQUAL(bloom2.probably_contains(i), true);
   }
 }
 
-BOOST_AUTO_TEST_CASE(size) {
+BOOST_AUTO_TEST_CASE(bit_capacity) {
   bloom_filter<size_t, 8> bloom_8;
   bloom_filter<size_t, 256> bloom_256;
   bloom_filter<size_t, 2048> bloom_2048;
   
- BOOST_CHECK_EQUAL(bloom_8.size(), 8ul);
- BOOST_CHECK_EQUAL(bloom_256.size(), 256ul);
- BOOST_CHECK_EQUAL(bloom_2048.size(), 2048ul);
+ BOOST_CHECK_EQUAL(bloom_8.bit_capacity(), 8ul);
+ BOOST_CHECK_EQUAL(bloom_256.bit_capacity(), 256ul);
+ BOOST_CHECK_EQUAL(bloom_2048.bit_capacity(), 2048ul);
+}
+
+BOOST_AUTO_TEST_CASE(empty) {
+ bloom_filter<size_t, 8> bloom;
+
+ BOOST_CHECK_EQUAL(bloom.empty(), true);
+ bloom.insert(1);
+ BOOST_CHECK_EQUAL(bloom.empty(), false);
+ bloom.clear();
+ BOOST_CHECK_EQUAL(bloom.empty(), true);
 }
 
 BOOST_AUTO_TEST_CASE(numHashFunctions) {
@@ -71,7 +81,7 @@
     boost_hash<size_t, 6>,
     boost_hash<size_t, 7> > > bloom_7;
 
- BOOST_CHECK_EQUAL(bloom_3.num_hash_functions(), 3ul);
+ BOOST_CHECK_EQUAL(bloom_3.num_hash_functions(), 1ul);
   BOOST_CHECK_EQUAL(bloom_2.num_hash_functions(), 2ul);
   BOOST_CHECK_EQUAL(bloom_7.num_hash_functions(), 7ul);
 }
@@ -82,33 +92,35 @@
   BOOST_CHECK_EQUAL(bloom.false_positive_rate(), 0.0);
 
   bloom.insert(1);
- BOOST_CHECK_CLOSE(bloom.false_positive_rate(), 0.002257625907, 0.0001);
+ BOOST_CHECK_CLOSE(bloom.false_positive_rate(), 0.015504, .01);
 
   bloom.insert(2);
- BOOST_CHECK_LT(bloom.false_positive_rate(), 0.014736);
+ BOOST_CHECK_CLOSE(bloom.false_positive_rate(), 0.030768, .01);
 
   bloom.insert(3);
- BOOST_CHECK_LT(bloom.false_positive_rate(), 0.040773);
+ BOOST_CHECK_CLOSE(bloom.false_positive_rate(), 0.045794, .01);
 
   bloom.insert(4);
- BOOST_CHECK_LT(bloom.false_positive_rate(), 0.0796276);
+ BOOST_CHECK_CLOSE(bloom.false_positive_rate(), 0.060588, .01);
 
   bloom.insert(5);
- BOOST_CHECK_LT(bloom.false_positive_rate(), 0.12877);
+ BOOST_CHECK_CLOSE(bloom.false_positive_rate(), 0.075151, .01);
 
   bloom.insert(6);
- BOOST_CHECK_LT(bloom.false_positive_rate(), 0.185102);
+ BOOST_CHECK_CLOSE(bloom.false_positive_rate(), 0.089491, .01);
 
   for (size_t i = 7; i < 5000; ++i)
     bloom.insert(i);
+
+ BOOST_CHECK_GE(bloom.false_positive_rate(), 0.6);
   BOOST_CHECK_LE(bloom.false_positive_rate(), 1.0);
 }
 
-BOOST_AUTO_TEST_CASE(contains) {
+BOOST_AUTO_TEST_CASE(probably_contains) {
   bloom_filter<size_t, 8> bloom;
 
   bloom.insert(1);
- BOOST_CHECK_EQUAL(bloom.contains(1), true);
+ BOOST_CHECK_EQUAL(bloom.probably_contains(1), true);
   BOOST_CHECK_LE(bloom.count(), 3ul);
   BOOST_CHECK_GE(bloom.count(), 1ul);
 }
@@ -116,7 +128,7 @@
 BOOST_AUTO_TEST_CASE(doesNotContain) {
   bloom_filter<size_t, 8> bloom;
 
- BOOST_CHECK_EQUAL(bloom.contains(1), false);
+ BOOST_CHECK_EQUAL(bloom.probably_contains(1), false);
 }
 
 BOOST_AUTO_TEST_CASE(insertNoFalseNegatives) {
@@ -124,7 +136,7 @@
 
   for (size_t i = 0; i < 100; ++i) {
     bloom.insert(i);
- BOOST_CHECK_EQUAL(bloom.contains(i), true);
+ BOOST_CHECK_EQUAL(bloom.probably_contains(i), true);
   }
 }
 
@@ -135,7 +147,7 @@
     bloom.insert(i);
 
   bloom.clear();
- BOOST_CHECK_EQUAL(bloom.contains(1), false);
+ BOOST_CHECK_EQUAL(bloom.probably_contains(1), false);
   BOOST_CHECK_EQUAL(bloom.count(), 0ul);
 }
 
@@ -153,7 +165,7 @@
   bloom_union = bloom_1 | bloom_2;
 
   for (size_t i = 0; i < 200; ++i)
- BOOST_CHECK_EQUAL(bloom_union.contains(i), true);
+ BOOST_CHECK_EQUAL(bloom_union.probably_contains(i), true);
   BOOST_CHECK_GE(bloom_union.count(), bloom_1.count());
   BOOST_CHECK_GE(bloom_union.count(), bloom_2.count());
 }
@@ -168,7 +180,7 @@
   bloom_union |= bloom_1;
 
   for (size_t i = 0; i < 100; ++i)
- BOOST_CHECK_EQUAL(bloom_union.contains(i), true);
+ BOOST_CHECK_EQUAL(bloom_union.probably_contains(i), true);
   BOOST_CHECK_EQUAL(bloom_union.count(), bloom_1.count());
 }
 
@@ -188,7 +200,7 @@
 
   BOOST_CHECK_LE(bloom_intersect.count(), bloom_1.count());
   BOOST_CHECK_LE(bloom_intersect.count(), bloom_2.count());
- BOOST_CHECK_EQUAL(bloom_intersect.contains(100), true);
+ BOOST_CHECK_EQUAL(bloom_intersect.probably_contains(100), true);
 }
 
 BOOST_AUTO_TEST_CASE(testIntersectAssign) {
@@ -201,7 +213,7 @@
   bloom_intersect &= bloom_1;
 
   for (size_t i = 0; i < 100; ++i)
- BOOST_CHECK_EQUAL(bloom_intersect.contains(i), false);
+ BOOST_CHECK_EQUAL(bloom_intersect.probably_contains(i), false);
 }
 
 /*
@@ -225,7 +237,7 @@
   std::cout << "bloom size " << bloom.size() << std::endl;
   bloom.insert(INSERT_VAL);
   for (size_t i = 0; i < SEARCH_SPACE; ++i) {
- if (bloom.contains(i) && i != INSERT_VAL) ++collisions;
+ if (bloom.probably_contains(i) && i != INSERT_VAL) ++collisions;
   }
 
   std::cout << collisions << " collisions" << std::endl;


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk