Boost logo

Boost-Commit :

From: franklin.jonathan_at_[hidden]
Date: 2008-05-08 13:40:52


Author: jfranklin
Date: 2008-05-08 13:40:52 EDT (Thu, 08 May 2008)
New Revision: 45221
URL: http://svn.boost.org/trac/boost/changeset/45221

Log:
Moved the cluster_data template into its own header.
Cleaned up the code slightly, and added some doc comments.

Added:
   sandbox/boost/algorithm/cluster/cluster_data.hpp (contents, props changed)
Text files modified:
   sandbox/boost/algorithm/cluster/dbscan.hpp | 80 ++++++++++++---------------------------
   1 files changed, 25 insertions(+), 55 deletions(-)

Added: sandbox/boost/algorithm/cluster/cluster_data.hpp
==============================================================================
--- (empty file)
+++ sandbox/boost/algorithm/cluster/cluster_data.hpp 2008-05-08 13:40:52 EDT (Thu, 08 May 2008)
@@ -0,0 +1,63 @@
+#if ! defined BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP
+#define BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP
+
+#include <boost/shared_ptr.hpp>
+#include <vector>
+
+namespace boost
+{
+namespace algorithm
+{
+namespace cluster
+{
+
+/*! TODO: Document this type.
+ */
+template<typename Cluster>
+struct cluster_data
+{
+ typedef Cluster value_type;
+ typedef std::vector<value_type> clusters;
+ cluster_data() : m_pClusters(new clusters) {}
+ ~cluster_data() {}
+
+ cluster_data(cluster_data const & c) : m_pClusters(c.m_pClusters) {}
+ cluster_data const & cluster_data::operator=(cluster_data const & rhs)
+ { m_pClusters = rhs.m_pClusters; }
+
+ typedef typename clusters::iterator iterator;
+ typedef typename clusters::const_iterator const_iterator;
+ typedef typename clusters::reverse_iterator reverse_iterator;
+
+ iterator begin() { return m_pClusters->begin(); }
+ iterator end() { return m_pClusters->end(); }
+
+ const_iterator begin() const { return m_pClusters->begin(); }
+ const_iterator end() const { return m_pClusters->end(); }
+
+ iterator rbegin() { return m_pClusters->rbegin(); }
+ iterator rend() { return m_pClusters->rend(); }
+
+ iterator insert(iterator loc, value_type const & val)
+ { return m_pClusters->insert(loc, val); }
+
+ void push_back(value_type const & v) { m_pClusters->push_back(v); }
+ void pop_back() { m_pClusters->pop_back(); }
+
+ value_type & back() { return m_pClusters->back(); }
+ value_type const & back() const { return m_pClusters->back(); }
+
+private:
+ boost::shared_ptr<clusters> m_pClusters;
+};
+
+} // End of namespace cluster
+
+// TODO: Should we be exporting this?
+using namespace cluster;
+
+} // End of namespace algorithm
+
+} // End of namespace boost
+
+#endif // BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP

Modified: sandbox/boost/algorithm/cluster/dbscan.hpp
==============================================================================
--- sandbox/boost/algorithm/cluster/dbscan.hpp (original)
+++ sandbox/boost/algorithm/cluster/dbscan.hpp 2008-05-08 13:40:52 EDT (Thu, 08 May 2008)
@@ -1,11 +1,8 @@
 #if ! defined BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP
 #define BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP
 
-#include <boost/range/begin.hpp>
-#include <boost/range/end.hpp>
-#include <boost/shared_ptr.hpp>
+#include <boost/algorithm/cluster/cluster_data.hpp>
 #include <vector>
-#include <list>
 
 namespace boost
 {
@@ -18,13 +15,13 @@
 {
 
 // TODO: Replace this naive query function w/ R*-tree or fractional cascading.
-// It makes the runtime quadratic.
+// This query mechanism makes the runtime quadratic.
 template<typename NTupleIter, typename DistFun>
 static void query(
   NTupleIter const & query_pt,
   NTupleIter const & begin,
   NTupleIter const & end,
- float eps,
+ typename NTupleIter::difference_type eps,
   DistFun const & d,
   std::vector<NTupleIter> & v)
 {
@@ -40,7 +37,7 @@
   }
 }
 
-// TODO: Replace this so we don't have to store the cluster info for each tuple.
+// TODO: Replace this so we don't have to store the cluster info for each tuple?
 template<typename NTupleIter>
 struct node
 {
@@ -52,46 +49,14 @@
 
 } // End of namespace detail.
 
-// TODO: Document this type.
-template<typename Cluster>
-struct cluster_data
-{
- typedef Cluster value_type;
- typedef std::vector<value_type> clusters;
- cluster_data() : m_pClusters(new clusters) {}
- ~cluster_data() {}
-
- cluster_data(cluster_data const & c) : m_pClusters(c.m_pClusters) {}
- cluster_data const & cluster_data::operator=(cluster_data const & rhs)
- { m_pClusters = rhs.m_pClusters; }
-
- typedef typename clusters::iterator iterator;
- typedef typename clusters::const_iterator const_iterator;
- typedef typename clusters::reverse_iterator reverse_iterator;
-
- iterator begin() { return m_pClusters->begin(); }
- iterator end() { return m_pClusters->end(); }
-
- const_iterator begin() const { return m_pClusters->begin(); }
- const_iterator end() const { return m_pClusters->end(); }
-
- iterator rbegin() { return m_pClusters->rbegin(); }
- iterator rend() { return m_pClusters->rend(); }
-
- iterator insert(iterator loc, value_type const & val)
- { return m_pClusters->insert(loc, val); }
-
- void push_back(value_type const & v) { m_pClusters->push_back(v); }
- void pop_back() { m_pClusters->pop_back(); }
-
- value_type & back() { return m_pClusters->back(); }
- value_type const & back() const { return m_pClusters->back(); }
-
-private:
- boost::shared_ptr<clusters> m_pClusters;
-};
-
-/**
+/*! DBSCAN density-based clustering algorithm.
+ * TODO: Document this function.
+ * \param[in] begin
+ * \param[in] end
+ * \param[in] eps
+ * \param[in] min_points
+ * \param[in] d
+ * \return The cluster data (partitioning of the tuples).
  */
 template<typename Cluster, typename NTupleIter, typename DistFun>
 cluster_data<Cluster>
@@ -101,7 +66,10 @@
        size_t min_points,
        DistFun const & d)
 {
- // TODO: Rework the algorithm to NOT make this extra collection.
+ int const UNCLASSIFIED = -1;
+ int const NOISE = 0;
+
+ // TODO: Rework the algorithm to NOT make this extra collection?
   typedef detail::node<NTupleIter> node;
   typedef std::vector<node> ntuple_nodes;
   ntuple_nodes tuples;
@@ -111,24 +79,25 @@
   for(NTupleIter it = begin; it != end; ++it)
   {
     //++num_elems;
- //it->cluster = UNCLASSIFIED;
     tuples.push_back(node(it));
   }
 
   typedef cluster_data<std::vector<NTupleIter> > cluster_data;
   cluster_data p;
 
- // Do it...
+ // TODO: We should try to make cluster_num go away.
   int cluster_num = 0;
   for(ntuple_nodes::iterator it = tuples.begin(); it != tuples.end(); ++it)
   {
- if (it->cluster != UNCLASSIFIED) // Been classified.
+ // Skip this tuple if its already been classified as a cluster or noise.
+ if (it->cluster != UNCLASSIFIED)
       continue;
 
     // Expand cluster.
 
     std::vector<ntuple_nodes::iterator> seeds;
     detail::query(it, tuples.begin(), tuples.end(), eps, d, seeds);
+ // If the neighborhood of this tuple is too small, then mark it as noise.
     if (seeds.size() < min_points)
     {
       it->cluster = NOISE;
@@ -137,20 +106,20 @@
 
     // Start the next cluster.
     ++cluster_num;
- p.push_back(Cluster());
+ p.push_back(Cluster()); // TODO: This is goofy.
     Cluster & cur_cluster = p.back();
 
- // Mark entire neighborhood as part of current cluster.
+ // Mark entire neighborhood as part of the current cluster.
     it->cluster = cluster_num;
     cur_cluster.push_back(it->tuple);
- // TODO: Remove it from noise.
     for (size_t n = 0; n < seeds.size(); ++n)
     {
       seeds[n]->cluster = cluster_num;
       cur_cluster.push_back(seeds[n]->tuple);
- // TODO: Remove it from noise.
     }
 
+ // Keep adding seeds and processing them until we find all points that
+ // are Density Reachable.
     while (! seeds.empty())
     {
       ntuple_nodes::iterator cur = seeds.back();
@@ -181,6 +150,7 @@
 
 } // End of namespace cluster
 
+// TODO: Should we be exporting this?
 using namespace cluster;
 
 } // End of namespace algorithm


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk