Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r71995 - in sandbox/bloom_filter/branches/prototype: . include test
From: cpp.cabrera_at_[hidden]
Date: 2011-05-16 15:28:51


Author: alejandro
Date: 2011-05-16 15:28:50 EDT (Mon, 16 May 2011)
New Revision: 71995
URL: http://svn.boost.org/trac/boost/changeset/71995

Log:
Restructuring. Implemented union, intersect. Decided that defaults for
constructors and assignment were sufficient.

Added:
   sandbox/bloom_filter/branches/prototype/include/
   sandbox/bloom_filter/branches/prototype/include/bloom.hpp (contents, props changed)
   sandbox/bloom_filter/branches/prototype/include/hash.hpp (contents, props changed)
   sandbox/bloom_filter/branches/prototype/test/
   sandbox/bloom_filter/branches/prototype/test/makefile (contents, props changed)
   sandbox/bloom_filter/branches/prototype/test/test_bloom.cpp (contents, props changed)
Removed:
   sandbox/bloom_filter/branches/prototype/bloom.hpp
   sandbox/bloom_filter/branches/prototype/hash.hpp
   sandbox/bloom_filter/branches/prototype/makefile
   sandbox/bloom_filter/branches/prototype/test_bloom.cpp

Deleted: sandbox/bloom_filter/branches/prototype/bloom.hpp
==============================================================================
--- sandbox/bloom_filter/branches/prototype/bloom.hpp 2011-05-16 15:28:50 EDT (Mon, 16 May 2011)
+++ (empty file)
@@ -1,102 +0,0 @@
-#ifndef _BLOOM_HPP
-#define _BLOOM_HPP 1
-
-#include <bitset>
-#include <tuple>
-#include "hash.hpp"
-
-template <size_t N,
- typename T,
- size_t Size,
- class HashFunctions>
-struct apply_hash
-{
- static void insert(const T& t, std::bitset<Size>& _bits) {
- typedef typename std::tuple_element<N, HashFunctions>::type Hash;
- _bits[Hash::hash(t) % Size] = true;
- apply_hash<N-1, T, Size, HashFunctions>::insert(t, _bits);
- }
-
- static bool contains(const T& t, const std::bitset<Size>& _bits) {
- typedef typename std::tuple_element<N, HashFunctions>::type Hash;
- return (_bits[Hash::hash(t) % Size] &&
- apply_hash<N-1, T, Size, HashFunctions>::contains(t, _bits));
- }
-};
-
-template <typename T,
- size_t Size,
- class HashFunctions>
-struct apply_hash<0, T, Size, HashFunctions>
-{
- static void insert(const T& t, std::bitset<Size>& _bits) {
- typedef typename std::tuple_element<0, HashFunctions>::type Hash;
- _bits[Hash::hash(t) % Size] = true;
- }
-
- static bool contains(const T& t, const std::bitset<Size>& _bits) {
- typedef typename std::tuple_element<0, HashFunctions>::type Hash;
- return (_bits[Hash::hash(t) % Size]);
- }
-};
-
-template <typename T,
- size_t Size,
- class HashFunctions = std::tuple<MurmurHash3<T, 3>,
- MurmurHash3<T, 5>,
- MurmurHash3<T, 7> > >
-class bloom_filter {
- typedef std::bitset<Size> Bitset;
-
-public:
- bloom_filter()
- {
- }
-
- // \todo: need to add compiler check for constexpr
- constexpr size_t size() const {
- return Size;
- }
-
- void insert(const T& t) {
- static const unsigned size = std::tuple_size<HashFunctions>::value - 1;
- apply_hash<size, T, Size, HashFunctions>::insert(t, bits);
- }
-
- bool contains(const T& t) const {
- static const unsigned size = std::tuple_size<HashFunctions>::value - 1;
- return apply_hash<size, T, Size, HashFunctions>::contains(t, bits);
- }
-
- bool operator[](const T& t) const {
- return this->contains(t);
- }
-
- void clear() {
- bits.reset();
- }
-
- bloom_filter(const bloom_filter&);
- bloom_filter& operator=(const bloom_filter& other);
-
- // \todo: need to add compiler check for rvalue references
- bloom_filter(const bloom_filter&&);
- bloom_filter& operator=(const bloom_filter&& other);
-
- bloom_filter& operator&=(const bloom_filter& rhs);
- bloom_filter& operator|=(const bloom_filter& rhs);
-
- template<class _T, size_t _Size, class _HashFunctions>
- friend bloom_filter<_T, _Size, _HashFunctions>&
- operator|(const bloom_filter<_T, _Size, _HashFunctions>& lhs,
- const bloom_filter<_T, _Size, _HashFunctions>& rhs);
-
- template<class _T, size_t _Size, class _HashFunctions>
- friend bloom_filter<_T, _Size, _HashFunctions>&
- operator&(const bloom_filter<_T, _Size, _HashFunctions>& lhs,
- const bloom_filter<_T, _Size, _HashFunctions>& rhs);
-
-private:
- std::bitset<Size> bits;
-};
-#endif

Deleted: sandbox/bloom_filter/branches/prototype/hash.hpp
==============================================================================
--- sandbox/bloom_filter/branches/prototype/hash.hpp 2011-05-16 15:28:50 EDT (Mon, 16 May 2011)
+++ (empty file)
@@ -1,98 +0,0 @@
-#ifndef _HASH_HPP_
-#define _HASH_HPP_ 1
-
-#include <cstdint>
-
-template <typename UnsignedIntT>
-inline UnsignedIntT rotl(const UnsignedIntT x, uint8_t r)
-{
- return (x << r) | (x >> (sizeof(UnsignedIntT) * 4 - r));
-}
-
-inline uint32_t rotl32(const uint32_t x, uint8_t r)
-{
- return rotl<uint32_t>(x,r);
-}
-
-inline uint64_t rotl64(const uint64_t x, uint8_t r)
-{
- return rotl<uint64_t>(x,r);
-}
-
-template <typename UnsignedIntT>
-inline UnsignedIntT get_block(const UnsignedIntT * p, const int i)
-{
- return p[i];
-}
-
-inline uint32_t fmix(uint32_t h)
-{
- h ^= h >> 16;
- h *= 0x85ebca6b;
- h ^= h >> 13;
- h *= 0xc2b2ae35;
- h ^= h >> 16;
-
- return h;
-}
-
-void murmurhash3(const void *const key, const int len,
- const uint32_t seed, void *out)
-{
- static const uint32_t c1 = 0xcc9e2d51;
- static const uint32_t c2 = 0x1b873593;
-
- const uint8_t *const data = static_cast<const uint8_t *const>(key);
-
- uint32_t h1 = seed;
- const uint32_t *const blocks =
- reinterpret_cast<const uint32_t *const >(data + len);
-
- for (int i = -(len/4); i; ++i) {
- uint32_t k1 = blocks[i];
-
- k1 *= c1;
- k1 = rotl32(k1, 15);
- k1 *= c2;
-
- h1 ^= k1;
- h1 = rotl32(h1, 13);
- h1 = h1*5 + 0xe6546b64;
- }
-
- const uint8_t *const tail =
- static_cast<const uint8_t *const>(data + len);
-
- uint32_t k1 = 0;
-
- switch (len & 3) {
- case 3:
- k1 ^= tail[2] << 16;
- case 2:
- k1 ^= tail[1] << 8;
- case 1:
- k1 ^= tail[0];
- k1 *= c1;
- k1 = rotl32(k1, 16);
- k1 *= c2;
- h1 ^= k1;
- }
-
- h1 ^= len;
- h1 = fmix(h1);
-
- *static_cast<uint32_t *>(out) = h1;
-}
-
-template <typename T, size_t Seed>
-struct MurmurHash3 {
- static size_t hash(const T& t) {
- size_t out = 0;
- murmurhash3(static_cast<const void *const>(&t),
- sizeof(t),
- Seed,
- &out);
- return out;
- }
-};
-#endif

Added: sandbox/bloom_filter/branches/prototype/include/bloom.hpp
==============================================================================
--- (empty file)
+++ sandbox/bloom_filter/branches/prototype/include/bloom.hpp 2011-05-16 15:28:50 EDT (Mon, 16 May 2011)
@@ -0,0 +1,121 @@
+#ifndef _BLOOM_HPP
+#define _BLOOM_HPP 1
+/**
+ * \author Alejandro Cabrera
+ * \brief A generic Bloom filter providing compile-time unrolling
+ * of hash function application.
+ */
+#include <boost/config.hpp>
+#include <bitset>
+#include <tuple>
+#include "hash.hpp"
+
+template <size_t N,
+ typename T,
+ size_t Size,
+ class HashFunctions>
+struct apply_hash
+{
+ static void insert(const T& t, std::bitset<Size>& _bits) {
+ typedef typename std::tuple_element<N, HashFunctions>::type Hash;
+ _bits[Hash::hash(t) % Size] = true;
+ apply_hash<N-1, T, Size, HashFunctions>::insert(t, _bits);
+ }
+
+ static bool contains(const T& t, const std::bitset<Size>& _bits) {
+ typedef typename std::tuple_element<N, HashFunctions>::type Hash;
+ return (_bits[Hash::hash(t) % Size] &&
+ apply_hash<N-1, T, Size, HashFunctions>::contains(t, _bits));
+ }
+};
+
+/**
+ * \todo: clean up tuples here, as well
+ */
+template <typename T,
+ size_t Size,
+ class HashFunctions>
+struct apply_hash<0, T, Size, HashFunctions>
+{
+ static void insert(const T& t, std::bitset<Size>& _bits) {
+ typedef typename std::tuple_element<0, HashFunctions>::type Hash;
+ _bits[Hash::hash(t) % Size] = true;
+ }
+
+ static bool contains(const T& t, const std::bitset<Size>& _bits) {
+ typedef typename std::tuple_element<0, HashFunctions>::type Hash;
+ return (_bits[Hash::hash(t) % Size]);
+ }
+};
+
+/**
+ * \todo: clean-up use of tuple here. Not all compilers support std::tuple
+ */
+template <typename T,
+ size_t Size,
+ class HashFunctions = std::tuple<MurmurHash3<T, 3>,
+ MurmurHash3<T, 5>,
+ MurmurHash3<T, 7> > >
+class bloom_filter {
+ typedef std::bitset<Size> Bitset;
+
+public:
+ bloom_filter() {}
+
+ // \todo: need to add compiler check for constexpr
+ BOOST_CONSTEXPR size_t size() const {
+ return Size;
+ }
+
+ void insert(const T& t) {
+ static const unsigned size = std::tuple_size<HashFunctions>::value - 1;
+ apply_hash<size, T, Size, HashFunctions>::insert(t, bits);
+ }
+
+ bool contains(const T& t) const {
+ static const unsigned size = std::tuple_size<HashFunctions>::value - 1;
+ return apply_hash<size, T, Size, HashFunctions>::contains(t, bits);
+ }
+
+ bool operator[](const T& t) const {
+ return this->contains(t);
+ }
+
+ void clear() {
+ this->bits.reset();
+ }
+
+ // \todo: need to add compiler check for rvalue references
+ bloom_filter(const bloom_filter&&);
+ bloom_filter& operator=(const bloom_filter&& other);
+
+ bloom_filter& operator|=(const bloom_filter& rhs) {
+ this->bits |= rhs.bits;
+ }
+
+ bloom_filter& operator&=(const bloom_filter& rhs) {
+ this->bits &= rhs.bits;
+ }
+
+ template<class _T, size_t _Size, class _HashFunctions>
+ friend bloom_filter<_T, _Size, _HashFunctions>&
+ operator|(const bloom_filter<_T, _Size, _HashFunctions>& lhs,
+ const bloom_filter<_T, _Size, _HashFunctions>& rhs)
+ {
+ bloom_filter<_T, _Size, _HashFunctions> ret = lhs;
+ return (ret |= rhs);
+ }
+
+ template<class _T, size_t _Size, class _HashFunctions>
+ friend bloom_filter<_T, _Size, _HashFunctions>&
+ operator&(const bloom_filter<_T, _Size, _HashFunctions>& lhs,
+ const bloom_filter<_T, _Size, _HashFunctions>& rhs)
+ {
+ bloom_filter<_T, _Size, _HashFunctions> ret = lhs;
+ return (ret &= rhs);
+ }
+
+private:
+ std::bitset<Size> bits;
+};
+#endif

Added: sandbox/bloom_filter/branches/prototype/include/hash.hpp
==============================================================================
--- (empty file)
+++ sandbox/bloom_filter/branches/prototype/include/hash.hpp 2011-05-16 15:28:50 EDT (Mon, 16 May 2011)
@@ -0,0 +1,104 @@
+#ifndef _HASH_HPP_
+#define _HASH_HPP_ 1
+/**
+ * \author Alejandro Cabrera
+ * \brief An alternative implentation of murmurhash3 for users
+ * not wishing to rely on the public domain Murmurhash3.
+ * \todo Hash many more collisions than public domain version of murmurhash3.
+ * \todo Provide 64-bit implementation of murmurhash3.
+ */
+#include <cstdint>
+
+template <typename UnsignedIntT>
+inline UnsignedIntT rotl(const UnsignedIntT x, uint8_t r)
+{
+ return (x << r) | (x >> (sizeof(UnsignedIntT) * 4 - r));
+}
+
+inline uint32_t rotl32(const uint32_t x, uint8_t r)
+{
+ return rotl<uint32_t>(x,r);
+}
+
+inline uint64_t rotl64(const uint64_t x, uint8_t r)
+{
+ return rotl<uint64_t>(x,r);
+}
+
+template <typename UnsignedIntT>
+inline UnsignedIntT get_block(const UnsignedIntT * p, const int i)
+{
+ return p[i];
+}
+
+inline uint32_t fmix(uint32_t h)
+{
+ h ^= h >> 16;
+ h *= 0x85ebca6b;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35;
+ h ^= h >> 16;
+
+ return h;
+}
+
+void murmurhash3(const void *const key, const int len,
+ const uint32_t seed, void *out)
+{
+ static const uint32_t c1 = 0xcc9e2d51;
+ static const uint32_t c2 = 0x1b873593;
+
+ const uint8_t *const data = static_cast<const uint8_t *const>(key);
+
+ uint32_t h1 = seed;
+ const uint32_t *const blocks =
+ reinterpret_cast<const uint32_t *const >(data + len);
+
+ for (int i = -(len/4); i; ++i) {
+ uint32_t k1 = blocks[i];
+
+ k1 *= c1;
+ k1 = rotl32(k1, 15);
+ k1 *= c2;
+
+ h1 ^= k1;
+ h1 = rotl32(h1, 13);
+ h1 = h1*5 + 0xe6546b64;
+ }
+
+ const uint8_t *const tail =
+ static_cast<const uint8_t *const>(data + len);
+
+ uint32_t k1 = 0;
+
+ switch (len & 3) {
+ case 3:
+ k1 ^= tail[2] << 16;
+ case 2:
+ k1 ^= tail[1] << 8;
+ case 1:
+ k1 ^= tail[0];
+ k1 *= c1;
+ k1 = rotl32(k1, 16);
+ k1 *= c2;
+ h1 ^= k1;
+ }
+
+ h1 ^= len;
+ h1 = fmix(h1);
+
+ *static_cast<uint32_t *>(out) = h1;
+}
+
+template <typename T, size_t Seed>
+struct MurmurHash3 {
+ static size_t hash(const T& t) {
+ size_t out = 0;
+ murmurhash3(static_cast<const void *const>(&t),
+ sizeof(t),
+ Seed,
+ &out);
+ return out;
+ }
+};
+#endif

Deleted: sandbox/bloom_filter/branches/prototype/makefile
==============================================================================
--- sandbox/bloom_filter/branches/prototype/makefile 2011-05-16 15:28:50 EDT (Mon, 16 May 2011)
+++ (empty file)
@@ -1,12 +0,0 @@
-CXXFLAGS := -Wall -Wextra -pedantic -std=c++0x -O3 -g
-ST_LIBS := lib/murmurhash3/MurmurHash3.o
-all : test_bloom
-
-%.o : %.cpp bloom.hpp hash.hpp
- $(CXX) $(CXXFLAGS) -c $<
-
-test_bloom : test_bloom.o bloom.hpp hash.hpp
- $(CXX) $(CXXFLAGS) -o $@ $< $(ST_LIBS)
-
-clean:
- rm -f *.o test_bloom

Added: sandbox/bloom_filter/branches/prototype/test/makefile
==============================================================================
--- (empty file)
+++ sandbox/bloom_filter/branches/prototype/test/makefile 2011-05-16 15:28:50 EDT (Mon, 16 May 2011)
@@ -0,0 +1,14 @@
+CXXFLAGS := -Wall -Wextra -pedantic -std=c++0x -O3 -g
+ST_LIBS := ../lib/murmurhash3/MurmurHash3.o
+INCLUDE_DIR := ../include
+INCLUDES := $(INCLUDE_DIR)/bloom.hpp $(INCLUDE_DIR)/hash.hpp
+all : test_bloom
+
+%.o : %.cpp $(INCLUDES)
+ $(CXX) $(CXXFLAGS) -c $<
+
+test_bloom : test_bloom.o $(INCLUDES)
+ $(CXX) $(CXXFLAGS) -o $@ $< $(ST_LIBS)
+
+clean:
+ rm -f *.o test_bloom

Added: sandbox/bloom_filter/branches/prototype/test/test_bloom.cpp
==============================================================================
--- (empty file)
+++ sandbox/bloom_filter/branches/prototype/test/test_bloom.cpp 2011-05-16 15:28:50 EDT (Mon, 16 May 2011)
@@ -0,0 +1,48 @@
+#include <iostream>
+#include "../lib/murmurhash3/MurmurHash3.h"
+#include "../include/bloom.hpp"
+
+typedef int BloomType;
+
+template <typename T, size_t Seed>
+struct OHash {
+ static size_t hash(const T& t) {
+ size_t out = 0;
+ MurmurHash3_x86_32(static_cast<const void *const>(&t),
+ sizeof(t),
+ Seed,
+ &out);
+ return out;
+ }
+};
+
+typedef std::tuple<OHash<BloomType, 2>,
+ OHash<BloomType, 3>,
+ OHash<BloomType, 5>,
+ OHash<BloomType, 7>,
+ OHash<BloomType, 11>,
+ OHash<BloomType, 13>,
+ OHash<BloomType, 17>,
+ OHash<BloomType, 19>> EightHashFunctions_O;
+
+//typedef std::tuple<MurmurHash3<BloomType, 19>> SingleHashFunction;
+
+int main()
+{
+ static const BloomType INSERT_VAL = 100;
+ static const size_t SEARCH_SPACE = 10000000;
+ static const size_t FILTER_SIZE = 64;
+ size_t collisions = 0;
+ bloom_filter<BloomType, FILTER_SIZE, EightHashFunctions_O> bloom;
+
+ std::cout << "bloom size " << sizeof(bloom) << std::endl;
+ bloom.insert(INSERT_VAL);
+ for (size_t i = 0; i < SEARCH_SPACE; ++i) {
+ if (bloom[i] && i != INSERT_VAL) ++collisions;
+ }
+
+ std::cout << collisions << " collisions" << std::endl;
+ bloom.clear();
+
+ return 0;
+}

Deleted: sandbox/bloom_filter/branches/prototype/test_bloom.cpp
==============================================================================
--- sandbox/bloom_filter/branches/prototype/test_bloom.cpp 2011-05-16 15:28:50 EDT (Mon, 16 May 2011)
+++ (empty file)
@@ -1,48 +0,0 @@
-#include <iostream>
-#include "lib/murmurhash3/MurmurHash3.h"
-#include "bloom.hpp"
-
-typedef int BloomType;
-
-template <typename T, size_t Seed>
-struct OHash {
- static size_t hash(const T& t) {
- size_t out = 0;
- MurmurHash3_x86_32(static_cast<const void *const>(&t),
- sizeof(t),
- Seed,
- &out);
- return out;
- }
-};
-
-typedef std::tuple<OHash<BloomType, 2>,
- OHash<BloomType, 3>,
- OHash<BloomType, 5>,
- OHash<BloomType, 7>,
- OHash<BloomType, 11>,
- OHash<BloomType, 13>,
- OHash<BloomType, 17>,
- OHash<BloomType, 19>> EightHashFunctions_O;
-
-//typedef std::tuple<MurmurHash3<BloomType, 19>> SingleHashFunction;
-
-int main()
-{
- static const BloomType INSERT_VAL = 100;
- static const size_t SEARCH_SPACE = 10000000;
- static const size_t FILTER_SIZE = 64;
- size_t collisions = 0;
- bloom_filter<BloomType, FILTER_SIZE, EightHashFunctions_O> bloom;
-
- std::cout << "bloom size " << sizeof(bloom) << std::endl;
- bloom.insert(INSERT_VAL);
- for (size_t i = 0; i < SEARCH_SPACE; ++i) {
- if (bloom[i] && i != INSERT_VAL) ++collisions;
- }
-
- std::cout << collisions << " collisions" << std::endl;
- bloom.clear();
-
- return 0;
-}


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk