Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r56480 - in sandbox/statistics/goodness_of_fit: boost/statistics/goodness_of_fit/algorithm boost/statistics/goodness_of_fit/data libs/statistics/goodness_of_fit/doc
From: erwann.rogard_at_[hidden]
Date: 2009-09-29 19:10:44


Author: e_r
Date: 2009-09-29 19:10:44 EDT (Tue, 29 Sep 2009)
New Revision: 56480
URL: http://svn.boost.org/trac/boost/changeset/56480

Log:
m
Text files modified:
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/k_fold_estimate.hpp | 48 +++++++++++++++++++++++++++++++++++++++
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/data/k_fold_data.hpp | 33 +++++++++++++++-----------
   sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/doc/readme.txt | 12 +++++++++
   3 files changed, 77 insertions(+), 16 deletions(-)

Modified: sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/k_fold_estimate.hpp
==============================================================================
--- sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/k_fold_estimate.hpp (original)
+++ sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/k_fold_estimate.hpp 2009-09-29 19:10:44 EDT (Tue, 29 Sep 2009)
@@ -32,7 +32,6 @@
         BOOST_CONCEPT_ASSERT((
             estimator_concept::TrainableEstimator<E,range_>
         ));
-
 
         C caller;
         while(kfd.j()<kfd.k()){
@@ -52,13 +51,60 @@
                     t,
                     *i
                 );
+
                 ++i;
             }
             kfd.increment();
         }
     }
+
+
+ // D
+ struct k_fold_estimate_crtp : D{
+
+ template<typename I,typename T,typename E,typename It>
+ void operator()(
+ k_fold_data<T>& kfd,
+ E estimator,
+ It i, //Writes tests estimates
+ ){
+ typedef k_fold_data<T> k_fold_;
+ typedef typename k_fold_::range_train_data_type range_;
+ BOOST_CONCEPT_ASSERT((
+ estimator_concept::TrainableEstimator<E,range_>
+ ));
+
+ while(kfd.j()<kfd.k()){
+ range_ range(
+ boost::begin( kfd.train_data() ),
+ boost::end( kfd.train_data() )
+ );
+
+ train(
+ estimator,
+ range
+ );
+ typedef const D& derived_;
+
+ derived_ d = static_cast<derived_>(*this);
+
+ BOOST_FOREACH(const T& t, kfd.test_data())
+ {
+
+ *i = statistics::estimate(
+ e,
+ d(t)
+ );
+
+ ++i;
+ }
+ kfd.increment();
+ }
+ }
+ };
     
 }// goodness_of_fit
+}// detail
 }// statistics
 }// boost
 

Modified: sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/data/k_fold_data.hpp
==============================================================================
--- sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/data/k_fold_data.hpp (original)
+++ sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/data/k_fold_data.hpp 2009-09-29 19:10:44 EDT (Tue, 29 Sep 2009)
@@ -20,12 +20,15 @@
 #include <boost/circular_buffer.hpp>
 #include <boost/iterator/iterator_traits.hpp>
 
-#include <boost/binary_op/meta/include.hpp>
+//#include <boost/binary_op/meta/include.hpp>
+#include <boost/statistics/detail.hpp>
 
 namespace boost{
 namespace statistics{
+namespace detail{// Added
 namespace goodness_of_fit{
 
+
         // k = 4
         // train test
         // [0][1][2] (3) j = 3
@@ -35,7 +38,8 @@
         // [3][0][1] (2) j = 2
         // [0][1][2] (3) j = 3
 
- // If regression, T == tuple
+ // Examples:
+ // If regression, T == tuple
     // If marginal, T == Y
     template<typename T>
     class k_fold_data{
@@ -59,16 +63,17 @@
         typedef boost::circular_buffer<T> train_data_type;
         typedef std::vector<T> test_data_type;
         typedef long int int_;
- typedef boost::sub_range<const train_data_type> range_train_data_type;
+ typedef boost::sub_range<const train_data_type>
+ range_train_data_type;
 
         k_fold_data();
         template<typename It>
- k_fold_data(int_ k, It b_xy,It e_xy);
+ k_fold_data(int_ k, It b,It e);
         k_fold_data(const k_fold_data&);
         k_fold_data& operator=(const k_fold_data&);
 
         template<typename It>
- void initialize(int_ k, It b_xy,It e_xy); // j = 0
+ void initialize(int_ k, It b,It e); // j = 0
         void initialize(); //restores state to j = 0
         void increment(); // ++j
 
@@ -104,9 +109,9 @@
     
     template<typename T>
         template<typename It>
- k_fold_data<T>::k_fold_data(int_ k, It b_xy,It e_xy)
+ k_fold_data<T>::k_fold_data(int_ k, It b,It e)
     {
- this->initialize(k,b_xy,e_xy);
+ this->initialize(k,b,e);
     }
     
     template<typename T>
@@ -127,32 +132,32 @@
 
     template<typename T>
         template<typename It>
- void k_fold_data<T>::initialize(int_ k,It b_xy,It e_xy)
+ void k_fold_data<T>::initialize(int_ k,It b,It e)
     {
         BOOST_ASSERT(k>1);
         typedef typename iterator_difference<It>::type diff_;
         this->k_ = k;
         this->j_ = -1;
- diff_ d = std::distance( b_xy, e_xy);
+ diff_ d = std::distance( b, e);
         if(d % this->k() != 0){
             static const str_ msg
- = str_("k_fold_estimate : distance(b_xy,e_xy)")
+ = str_("k_fold_estimate : distance(b,e)")
                     + "%1% not a multiple of k = %2%";
             throw std::runtime_error( ( format(msg) % d % k ).str() );
         }
         this->n_ = d / this->k();
 
- It i = boost::next( b_xy, this->n() * (k-1) );
+ It i = boost::next( b, this->n() * (k-1) );
         this->test_data_.clear();
         this->test_data_.reserve(this->n());
         std::copy(
             i,
- e_xy,
+ e,
             std::back_inserter(this->test_data_)
         );
         this->train_data_.assign(
             this->n() * (k-1),
- b_xy,
+ b,
             i
         );
         this->increment();
@@ -228,8 +233,8 @@
         return this->k_;
     }
     
-
 }// goodness_of_fit
+}// detail
 }// statistics
 }// boost
 

Modified: sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/doc/readme.txt
==============================================================================
--- sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/doc/readme.txt (original)
+++ sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/doc/readme.txt 2009-09-29 19:10:44 EDT (Tue, 29 Sep 2009)
@@ -13,7 +13,17 @@
 
 [ Overview ]
 
-C++ tools for cross validation.
+C++ tools for cross validation.
+
+The data structure k_fold_data keeps a dataset partitioned between train and
+test subset. The mf increment() changes the partition to the next of K
+partitions. The implementation relies on a circular buffer.
+
+The class k_fold_estimate automates the task of producing estimates for each
+element of the test sample of a client supplied estimator (e). A derived class
+takes care of extracting the argument, (a), of a test data point, t, that
+is passed to (e). For example, if (e) is a regression estimator, t = (x,y), so
+a = x.
 
 [ Compiler ]
 


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk