Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r55820 - in sandbox/statistics/goodness_of_fit: . boost boost/statistics boost/statistics/goodness_of_fit boost/statistics/goodness_of_fit/algorithm boost/statistics/goodness_of_fit/data boost/statistics/goodness_of_fit/functional libs libs/statistics libs/statistics/cross_validation libs/statistics/cross_validation/doc libs/statistics/cross_validation/example libs/statistics/cross_validation/src libs/statistics/goodness_of_fit libs/statistics/goodness_of_fit/doc libs/statistics/goodness_of_fit/example libs/statistics/goodness_of_fit/src
From: erwann.rogard_at_[hidden]
Date: 2009-08-27 20:50:14


Author: e_r
Date: 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
New Revision: 55820
URL: http://svn.boost.org/trac/boost/changeset/55820

Log:
adding dir gof
Added:
   sandbox/statistics/goodness_of_fit/
   sandbox/statistics/goodness_of_fit/boost/
   sandbox/statistics/goodness_of_fit/boost/statistics/
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/include.hpp (contents, props changed)
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/k_fold_estimate.hpp (contents, props changed)
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/k_fold_marginal_estimate.hpp (contents, props changed)
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/k_fold_regression_estimate.hpp (contents, props changed)
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/mean_abs_error.hpp (contents, props changed)
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/sqrt_mse.hpp (contents, props changed)
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/data/
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/data/include.hpp (contents, props changed)
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/data/k_fold_data.hpp (contents, props changed)
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/functional/
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/functional/call_marginal_estimate.hpp (contents, props changed)
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/functional/call_regression_estimate.hpp (contents, props changed)
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/functional/include.hpp (contents, props changed)
   sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/include.hpp (contents, props changed)
   sandbox/statistics/goodness_of_fit/libs/
   sandbox/statistics/goodness_of_fit/libs/statistics/
   sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/
   sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/doc/
   sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/doc/readme.txt (contents, props changed)
   sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/example/
   sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/example/average.hpp (contents, props changed)
   sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/example/k_fold.cpp (contents, props changed)
   sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/example/k_fold.h (contents, props changed)
   sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/src/
   sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/src/main.cpp (contents, props changed)
   sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/
   sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/doc/
   sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/doc/readme.txt (contents, props changed)
   sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/example/
   sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/example/average.hpp (contents, props changed)
   sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/example/k_fold.cpp (contents, props changed)
   sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/example/k_fold.h (contents, props changed)
   sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/src/
   sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/src/main.cpp (contents, props changed)

Added: sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/include.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/include.hpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,18 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::algorithm::include.hpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef BOOST_STATISTICS_GOODNESS_OF_FIT_ALGORITHM_INCLUDE_HPP_ER_2009
+#define BOOST_STATISTICS_GOODNESS_OF_FIT_ALGORITHM_INCLUDE_HPP_ER_2009
+
+#include <boost/statistics/goodness_of_fit/algorithm/k_fold_estimate.hpp>
+#include <boost/statistics/goodness_of_fit/algorithm/k_fold_marginal_estimate.hpp>
+#include <boost/statistics/goodness_of_fit/algorithm/k_fold_regression_estimate.hpp>
+#include <boost/statistics/goodness_of_fit/algorithm/mean_abs_error.hpp>
+#include <boost/statistics/goodness_of_fit/algorithm/sqrt_mse.hpp>
+
+#endif
\ No newline at end of file

Added: sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/k_fold_estimate.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/k_fold_estimate.hpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,65 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::algorithm::k_fold_estimate.hpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef BOOST_STATISTICS_GOODNESS_OF_FIT_ALGORITHM_K_FOLD_ESTIMATE_HPP_ER_2009
+#define BOOST_STATISTICS_GOODNESS_OF_FIT_ALGORITHM_K_FOLD_ESTIMATE_HPP_ER_2009
+#include <string>
+#include <stdexcept>
+#include <boost/concept_check.hpp>
+#include <boost/utility.hpp>
+#include <boost/foreach.hpp>
+#include <boost/iterator/iterator_traits.hpp>
+#include <boost/statistics/estimator_concept/trainable_estimator/concept.hpp>
+#include <boost/statistics/goodness_of_fit/data/k_fold_data.hpp>
+
+namespace boost{
+namespace statistics{
+namespace goodness_of_fit{
+
+ template<typename C,typename T,typename E,typename It>
+ void estimate(
+ k_fold_data<T>& kfd,
+ E estimator,
+ It i
+ ){
+ typedef goodness_of_fit::k_fold_data<T> k_fold_;
+ typedef typename k_fold_::range_train_data_type range_;
+ BOOST_CONCEPT_ASSERT((
+ estimator_concept::TrainableEstimator<E,range_>
+ ));
+
+
+ C caller;
+ while(kfd.j()<kfd.k()){
+ range_ range(
+ boost::begin( kfd.train_data() ),
+ boost::end( kfd.train_data() )
+ );
+
+ train(
+ estimator,
+ range
+ );
+ BOOST_FOREACH(const T& t, kfd.test_data())
+ {
+ caller(
+ estimator,
+ t,
+ *i
+ );
+ ++i;
+ }
+ kfd.increment();
+ }
+ }
+
+}// goodness_of_fit
+}// statistics
+}// boost
+
+#endif

Added: sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/k_fold_marginal_estimate.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/k_fold_marginal_estimate.hpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,37 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::algorithm::k_fold_regression_estimate.hpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef BOOST_STATISTICS_GOODNESS_OF_FIT_ALGORITHM_K_FOLD_MARGINAL_ESTIMATE_HPP_ER_2009
+#define BOOST_STATISTICS_GOODNESS_OF_FIT_ALGORITHM_K_FOLD_MARGINAL_ESTIMATE_HPP_ER_2009
+#include <boost/statistics/goodness_of_fit/functional/call_marginal_estimate.hpp>
+#include <boost/statistics/goodness_of_fit/algorithm/k_fold_estimate.hpp>
+
+namespace boost{
+namespace statistics{
+namespace goodness_of_fit{
+
+ template<typename T,typename E,typename It>
+ void marginal_estimate(
+ k_fold_data<T>& kfd, // T is a variable, such as Y or X, but not (X,Y)
+ E estimator,
+ It i
+ ){
+
+ return estimate<call_marginal_estimate>(
+ kfd,
+ estimator,
+ i
+ );
+
+ }
+
+}// goodness_of_fit
+}// statistics
+}// boost
+
+#endif
\ No newline at end of file

Added: sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/k_fold_regression_estimate.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/k_fold_regression_estimate.hpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,37 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::algorithm::k_fold_regression_estimate.hpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef BOOST_STATISTICS_GOODNESS_OF_FIT_ALGORITHM_K_FOLD_REGRESSION_ESTIMATE_HPP_ER_2009
+#define BOOST_STATISTICS_GOODNESS_OF_FIT_ALGORITHM_K_FOLD_REGRESSION_ESTIMATE_HPP_ER_2009
+#include <boost/statistics/goodness_of_fit/functional/call_regression_estimate.hpp>
+#include <boost/statistics/goodness_of_fit/algorithm/k_fold_estimate.hpp>
+
+namespace boost{
+namespace statistics{
+namespace goodness_of_fit{
+
+ template<typename T,typename E,typename It>
+ void regression_estimate(
+ k_fold_data<T>& kfd, // T is a (x,y) tuple
+ E estimator,
+ It i
+ ){
+
+ return estimate<call_regression_estimate>(
+ kfd,
+ estimator,
+ i
+ );
+
+ }
+
+}// goodness_of_fit
+}// statistics
+}// boost
+
+#endif
\ No newline at end of file

Added: sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/mean_abs_error.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/mean_abs_error.hpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,54 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::algorithm::mean_abs_error.hpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef BOOST_STATISTICS_GOODNESS_OF_FIT_ALGORITHM_MEAN_ABS_ERROR_HPP_ER_2009
+#define BOOST_STATISTICS_GOODNESS_OF_FIT_ALGORITHM_MEAN_ABS_ERROR_HPP_ER_2009
+#include <cmath>
+#include <boost/iterator/iterator_traits.hpp>
+#include <boost/range.hpp>
+#include <boost/vector_space/functional/l1_distance.hpp>
+
+namespace boost{
+namespace statistics{
+namespace goodness_of_fit{
+
+ template<typename It,typename It1>
+ typename iterator_value<It>::type
+ mean_abs_error(
+ It b,
+ It e,
+ It1 b1
+ ){
+
+ typedef iterator_range<It> range_;
+ typedef typename iterator_difference<It>::type diff_;
+ typedef iterator_range<It1> range1_;
+ typedef typename iterator_value<It>::type val_;
+ typedef vector_space::template l1_distance<range_> l1_;
+
+ diff_ d = std::distance(b,e);
+
+ l1_ l1(range_(b,e));
+ range1_ range1(
+ b1,
+ boost::next(
+ b1,
+ d
+ )
+ );
+ val_ res = l1(range1);
+ res /= static_cast<val_>(d);
+ res = sqrt( res );
+ return res;
+ };
+
+}// goodness_of_fit
+}// statistics
+}// boost
+
+#endif
\ No newline at end of file

Added: sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/sqrt_mse.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/algorithm/sqrt_mse.hpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,54 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::algorithm::sqrt_mse.hpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef BOOST_STATISTICS_GOODNESS_OF_FIT_ALGORITHM_SQRT_MSE_HPP_ER_2009
+#define BOOST_STATISTICS_GOODNESS_OF_FIT_ALGORITHM_SQRT_MSE_HPP_ER_2009
+#include <cmath>
+#include <boost/iterator/iterator_traits.hpp>
+#include <boost/range.hpp>
+#include <boost/vector_space/functional/l2_distance_squared.hpp>
+
+namespace boost{
+namespace statistics{
+namespace goodness_of_fit{
+
+ template<typename It,typename It1>
+ typename iterator_value<It>::type
+ sqrt_mse(
+ It b,
+ It e,
+ It1 b1
+ ){
+
+ typedef iterator_range<It> range_;
+ typedef typename iterator_difference<It>::type diff_;
+ typedef iterator_range<It1> range1_;
+ typedef typename iterator_value<It>::type val_;
+ typedef vector_space::l2_distance_squared<range_> l2_;
+
+ diff_ d = std::distance(b,e);
+
+ l2_ l2(range_(b,e));
+ range1_ range1(
+ b1,
+ boost::next(
+ b1,
+ d
+ )
+ );
+ val_ res = l2(range1);
+ res /= static_cast<val_>(d);
+ res = sqrt( res );
+ return res;
+ };
+
+}// goodness_of_fit
+}// statistics
+}// boost
+
+#endif
\ No newline at end of file

Added: sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/data/include.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/data/include.hpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,15 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::data::include.hpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef BOOST_STATISTICS_GOODNESS_OF_FIT_DATA_INCLUDE_HPP_ER_2009
+#define BOOST_STATISTICS_GOODNESS_OF_FIT_DATA_INCLUDE_HPP_ER_2009
+
+#include <boost/statistics/goodness_of_fit/data/k_fold_data.hpp>
+
+#endif
+

Added: sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/data/k_fold_data.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/data/k_fold_data.hpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,236 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::data::k_fold_data.hpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef BOOST_STATISTICS_GOODNESS_OF_FIT_DATA_K_FOLD_DATA_HPP_ER_2009
+#define BOOST_STATISTICS_GOODNESS_OF_FIT_DATA_K_FOLD_DATA_HPP_ER_2009
+#include <stdexcept>
+#include <iterator>
+#include <vector>
+#include <ostream>
+#include <boost/mpl/assert.hpp>
+#include <boost/mpl/bool.hpp>
+#include <boost/range.hpp>
+#include <boost/format.hpp>
+#include <boost/utility.hpp>
+#include <boost/circular_buffer.hpp>
+#include <boost/iterator/iterator_traits.hpp>
+
+#include <boost/binary_op/meta/include.hpp>
+
+namespace boost{
+namespace statistics{
+namespace goodness_of_fit{
+
+ // k = 4
+ // train test
+ // [0][1][2] (3) j = 3
+ // -------------
+ // [1][2][3] (0) j = 0
+ // [2][3][0] (1) j = 1
+ // [3][0][1] (2) j = 2
+ // [0][1][2] (3) j = 3
+
+ // If regression, T == tuple
+ // If marginal, T == Y
+ template<typename T>
+ class k_fold_data{
+ BOOST_MPL_ASSERT((
+ mpl::eval_if<
+ binary_op::is_tuple<T>,
+ binary_op::tuple_has_no_ref<T>,
+ mpl::bool_<true>
+ >
+ ));
+ BOOST_MPL_ASSERT((
+ mpl::eval_if<
+ binary_op::is_tuple<T>,
+ binary_op::tuple_has_no_const<T>,
+ mpl::bool_<true>
+ >
+ ));
+
+ typedef std::string str_;
+ public:
+ typedef boost::circular_buffer<T> train_data_type;
+ typedef std::vector<T> test_data_type;
+ typedef long int int_;
+ typedef boost::sub_range<const train_data_type> range_train_data_type;
+
+ k_fold_data();
+ template<typename It>
+ k_fold_data(int_ k, It b_xy,It e_xy);
+ k_fold_data(const k_fold_data&);
+ k_fold_data& operator=(const k_fold_data&);
+
+ template<typename It>
+ void initialize(int_ k, It b_xy,It e_xy); // j = 0
+ void initialize(); //restores state to j = 0
+ void increment(); // ++j
+
+ // Access
+ const train_data_type& train_data()const;
+ const test_data_type& test_data()const;
+
+ const int_& n()const; // size of test data
+ const int_& j()const; // index of current iteration
+ const int_& k()const; // number of iterations
+
+ //private:
+ int_ k_;
+ int_ j_;
+ int_ n_;
+ train_data_type train_data_;
+ test_data_type test_data_;
+ };
+
+ template<typename T>
+ std::ostream& operator<<(std::ostream& out,const k_fold_data<T>& that){
+ format f("k_fold_data(%1%,%2%,%3%)");
+ f % that.n() % that.j() % that.k();
+ return (out << f.str());
+ }
+
+
+ // Implementation //
+
+ template<typename T>
+ k_fold_data<T>::k_fold_data()
+ :k_(0),j_(0),n_(0),train_data_(),test_data_(){}
+
+ template<typename T>
+ template<typename It>
+ k_fold_data<T>::k_fold_data(int_ k, It b_xy,It e_xy)
+ {
+ this->initialize(k,b_xy,e_xy);
+ }
+
+ template<typename T>
+ k_fold_data<T>::k_fold_data(const k_fold_data& that)
+ :k_(that.k_),j_(that.j_),n_(that.n_),
+ train_data_(that.train_data_),
+ test_data_(that.test_data_){}
+
+ template<typename T>
+ k_fold_data<T>& k_fold_data<T>::operator=(const k_fold_data& that)
+ {
+ this->k_ = that.k_;
+ this->j_ = that.j_;
+ this->n_ = that.n_;
+ this->train_data_ = that.train_data_;
+ this->test_data_ = that.test_data_;
+ }
+
+ template<typename T>
+ template<typename It>
+ void k_fold_data<T>::initialize(int_ k,It b_xy,It e_xy)
+ {
+ BOOST_ASSERT(k>1);
+ typedef typename iterator_difference<It>::type diff_;
+ this->k_ = k;
+ this->j_ = -1;
+ diff_ d = std::distance( b_xy, e_xy);
+ if(d % this->k() != 0){
+ static const str_ msg
+ = str_("k_fold_estimate : distance(b_xy,e_xy)")
+ + "%1% not a multiple of k = %2%";
+ throw std::runtime_error( ( format(msg) % d % k ).str() );
+ }
+ this->n_ = d / this->k();
+
+ It i = boost::next( b_xy, this->n() * (k-1) );
+ this->test_data_.clear();
+ this->test_data_.reserve(this->n());
+ std::copy(
+ i,
+ e_xy,
+ std::back_inserter(this->test_data_)
+ );
+ this->train_data_.assign(
+ this->n() * (k-1),
+ b_xy,
+ i
+ );
+ this->increment();
+ }
+
+ template<typename T>
+ void k_fold_data<T>::initialize()
+ {
+ while(this->j()<this->k()){
+ this->increment();
+ }
+ this->j_ = 0;
+ }
+
+ template<typename T>
+ void k_fold_data<T>::increment(){
+ static test_data_type tmp;
+ if( !( this->j()<this->k() ) )
+ {
+ static const char* msg = "k_fold_data: !j<k=%1%";
+ throw std::runtime_error(
+ ( format( msg ) % this->k() ).str()
+ );
+ }
+ tmp.clear();
+ std::copy(
+ boost::begin(this->train_data()),
+ next( boost::begin(this->train_data()), this->n() ),
+ std::back_inserter(tmp)
+ );
+ this->train_data_.insert(
+ boost::end( this->train_data_ ),
+ boost::begin( this->test_data() ),
+ boost::end( this->test_data() )
+ );
+ this->test_data_ = tmp;
+ ++this->j_;
+ }
+
+ // Access
+ template<typename T>
+ const typename k_fold_data<T>::train_data_type&
+ k_fold_data<T>::train_data()const
+ {
+ return this->train_data_;
+ }
+
+ template<typename T>
+ const typename k_fold_data<T>::test_data_type&
+ k_fold_data<T>::test_data()const
+ {
+ return this->test_data_;
+ }
+
+ template<typename T>
+ const typename k_fold_data<T>::int_&
+ k_fold_data<T>::n()const
+ {
+ return this->n_;
+ }
+
+ template<typename T>
+ const typename k_fold_data<T>::int_&
+ k_fold_data<T>::j()const
+ {
+ return this->j_;
+ }
+
+ template<typename T>
+ const typename k_fold_data<T>::int_&
+ k_fold_data<T>::k()const
+ {
+ return this->k_;
+ }
+
+
+}// goodness_of_fit
+}// statistics
+}// boost
+
+#endif
\ No newline at end of file

Added: sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/functional/call_marginal_estimate.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/functional/call_marginal_estimate.hpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,31 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::functional::call_marginal_estimate.hpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef BOOST_STATISTICS_GOODNESS_OF_FIT_FUNCTIONAL_CALL_MARGINAL_ESTIMATE_HPP_ER_2009
+#define BOOST_STATISTICS_GOODNESS_OF_FIT_FUNCTIONAL_CALL_MARGINAL_ESTIMATE_HPP_ER_2009
+
+namespace boost{
+namespace statistics{
+namespace goodness_of_fit{
+
+ struct call_marginal_estimate{
+
+ call_marginal_estimate(){}
+
+ template<typename E,typename X,typename X1>
+ void operator()(const E& e,const X& x,X1& x1)const{
+ x1 = statistics::estimate(e,x);
+ }
+
+ };
+
+}// goodness_of_fit
+}// statistics
+}// boost
+
+#endif

Added: sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/functional/call_regression_estimate.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/functional/call_regression_estimate.hpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,37 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::functional::call_regression_estimate.hpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef BOOST_STATISTICS_GOODNESS_OF_FIT_FUNCTIONAL_CALL_REGRESSION_ESTIMATE_HPP_ER_2009
+#define BOOST_STATISTICS_GOODNESS_OF_FIT_FUNCTIONAL_CALL_REGRESSION_ESTIMATE_HPP_ER_2009
+
+namespace boost{
+namespace statistics{
+namespace goodness_of_fit{
+
+ struct call_regression_estimate{
+
+ call_regression_estimate(){}
+
+ template<typename E,typename T,typename Y>
+ void operator()(const E& e,const T& xy,Y& y)const{
+ y = statistics::estimate(
+ e,
+ boost::get<0>(xy)
+ );
+ }
+
+ };
+
+}// goodness_of_fit
+}// statistics
+}// boost
+
+#endif
+
+
+

Added: sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/functional/include.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/functional/include.hpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,15 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::functional::include.hpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef BOOST_STATISTICS_GOODNESS_OF_FIT_FUNCTIONAL_INCLUDE_HPP_ER_2009
+#define BOOST_STATISTICS_GOODNESS_OF_FIT_FUNCTIONAL_INCLUDE_HPP_ER_2009
+
+#include <boost/statistics/goodness_of_fit/functional/call_marginal_estimate.hpp>
+#include <boost/statistics/goodness_of_fit/functional/call_regression_estimate.hpp>
+
+#endif
\ No newline at end of file

Added: sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/include.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/boost/statistics/goodness_of_fit/include.hpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,16 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::include.hpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef BOOST_STATISTICS_GOODNESS_OF_FIT_INCLUDE_HPP_ER_2009
+#define BOOST_STATISTICS_GOODNESS_OF_FIT_INCLUDE_HPP_ER_2009
+
+#include <boost/statistics/goodness_of_fit/algorithm/include.hpp>
+#include <boost/statistics/goodness_of_fit/data/include.hpp>
+#include <boost/statistics/goodness_of_fit/functional/include.hpp>
+
+#endif
\ No newline at end of file

Added: sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/doc/readme.txt
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/doc/readme.txt 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,47 @@
+//////////////////////////////////////////////////////////////////////////////
+// goodness_of_fit::doc::readme //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+
+[ Contact ]
+
+erwann.rogard_at_[hidden]
+
+[ Overview ]
+
+C++ tools for cross validation.
+
+[ Compiler ]
+
+gcc version i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1
+
+[ Dependencies ]
+
+/usr/local/boost_1_39_0/
+sandbox/statistics/functional/
+/sandbox/statistics/estimator_concept/
+/sandbox/statistics/mpl/
+/sandbox/statistics/binary_op/
+/sandbox/statistics/random/
+/sandbox/statistics/arithmetic/
+/sandbox/statistics/scalar_dist/
+/sandbox/statistics/standard_distribution/
+/sandbox/statistics/estimator_concept/
+/sandbox/statistics/dist_random/
+
+[ History ]
+
+August 2009 - Creation
+
+[ TODO ]
+
+Currently, k_fold_data makes copy of the whole dataset. When join(vec0,vec1)
+is available, we may be able avoid duplicate data.
+
+[ Sources ]
+
+http://en.wikipedia.org/wiki/Cross-validation_%28statistics%29

Added: sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/example/average.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/example/average.hpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,72 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::example::average.h //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef LIBS_STATISTICS_GOODNESS_OF_FIT_EXAMPLE_AVERAGE_H_ER_2009
+#define LIBS_STATISTICS_GOODNESS_OF_FIT_EXAMPLE_AVERAGE_H_ER_2009
+#include <vector>
+#include <boost/tuple/tuple.hpp>
+#include <boost/functional/mean_var_accumulator.hpp>
+#include <boost/binary_op/meta/remove_ref_cv.hpp>
+
+namespace boost{
+namespace statistics{
+namespace goodness_of_fit{
+namespace detail{
+
+ // For use by examples, not part of the library.
+ template<typename T>
+ struct average{
+ typedef typename binary_op::tuple_remove_ref_cv<T>::type tuple_;
+ typedef typename tuples::element<1,tuple_>::type y_;
+ typedef typename
+ boost::functional::mean_var_accumulator<y_>::type mean_var_acc_;
+ y_ y;
+ };
+
+}// detail
+}// goodness_of_fit
+
+ template<typename T,typename R>
+ void train(
+ goodness_of_fit::detail::average<T>& e,
+ const R& data
+ ){
+ typedef goodness_of_fit::detail::average<T> average_;
+ typedef typename average_::y_ y_;
+ typedef std::vector<y_> vec_y_;
+ vec_y_ vec_y;
+ binary_op::tails(
+ boost::begin(data),
+ boost::end(data),
+ std::back_inserter(vec_y)
+ );
+
+ typedef typename average_::mean_var_acc_ acc_;
+ e.y = boost::accumulators::mean(
+ std::for_each(
+ boost::begin(vec_y),
+ boost::end(vec_y),
+ acc_()
+ )
+ );
+
+ };
+
+ template<typename T,typename X>
+ typename goodness_of_fit::detail::average<T>::y_
+ estimate(
+ const goodness_of_fit::detail::average<T>& e,
+ const X& x
+ ){
+ return e.y;
+ }
+
+}// statistics
+}// boost
+
+#endif
\ No newline at end of file

Added: sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/example/k_fold.cpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/example/k_fold.cpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,132 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::example::k_fold.cpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#include <algorithm>
+#include <iterator>
+#include <boost/tuple/tuple.hpp>
+#include <boost/foreach.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/format.hpp>
+#include <boost/arithmetic/equal.hpp>
+#include <boost/mpl/nested_type.hpp>
+#include <boost/binary_op/include.hpp>
+#include <boost/dist_random/distributions/normal.hpp>
+#include <boost/standard_distribution/distributions/normal.hpp>
+#include <boost/dist_random/random/include.hpp>
+#include <boost/statistics/goodness_of_fit/data/k_fold_data.hpp>
+#include <boost/statistics/goodness_of_fit/algorithm/k_fold_estimate.hpp>
+#include <boost/statistics/goodness_of_fit/algorithm/sqrt_mse.hpp>
+#include <boost/statistics/goodness_of_fit/algorithm/mean_abs_error.hpp>
+
+#include <libs/statistics/goodness_of_fit/example/average.hpp>
+#include <libs/statistics/goodness_of_fit/example/k_fold.h>
+
+void example_k_fold(std::ostream& out)
+{
+
+ using namespace boost;
+ namespace stat = boost::statistics;
+ namespace cv = stat::goodness_of_fit;
+
+ typedef mt19937 urng_;
+ typedef double val_;
+ typedef std::vector<val_> vals_;
+ typedef math::normal_distribution<val_> dist_;
+ typedef binary_op::tuple_range<const vals_&,const vals_&>
+ meta_tuple_range_;
+ typedef meta_tuple_range_::type tuple_range_;
+ typedef range_value<tuple_range_>::type tuple_ref_cv_;
+ typedef binary_op::tuple_remove_ref_cv<
+ tuple_ref_cv_
+ >::type tuple_;
+
+ typedef cv::detail::average<tuple_> estimator_;
+ typedef cv::k_fold_data<tuple_> k_fold_data_;
+ typedef k_fold_data_::test_data_type test_data_;
+
+ const unsigned n = 1e1;
+ const unsigned k = 5e0;
+ BOOST_ASSERT(n % k == 0);
+
+ vals_ vals;
+ dist_ dist;
+ urng_ urng;
+
+ generate_n(
+ std::back_inserter(vals), n,dist, urng
+ );
+
+ tuple_range_ tuples = meta_tuple_range_::make(
+ vals,
+ vals
+ );
+
+
+ k_fold_data_ k_fold_data(
+ k,
+ boost::begin(tuples),
+ boost::end(tuples)
+ );
+
+ out << k_fold_data;
+
+ for(unsigned i = 0; i<2; i++){
+ if(i>0){ k_fold_data.initialize(); }
+
+ typedef range_iterator<tuple_range_>::type tuples_it_;
+ tuples_it_ tuples_it = boost::begin( tuples );
+ while(k_fold_data.j()<k_fold_data.k()){
+ typedef range_value<test_data_>::type t_;
+ BOOST_FOREACH(const t_& t,k_fold_data.test_data())
+ {
+// out << std::endl;
+ val_ x = get<0>(t);
+ val_ x1 = get<0>(*tuples_it);
+ val_ y = get<1>(t);
+ val_ y1 = get<1>(*tuples_it);
+// out << '(' << x << ',' << y << ')';
+// out << '(' << x1 << ',' << y1 << ')';
+ BOOST_ASSERT( arithmetic_tools::equal(x,x1) );
+ BOOST_ASSERT( arithmetic_tools::equal(y,y1) );
+ ++tuples_it;
+ };
+ k_fold_data.increment();
+ }
+ out << std::endl << k_fold_data;
+ }
+
+ vals_ ests( size(tuples) );
+ k_fold_data.initialize();
+ out << std::endl;
+ stat::estimate(
+ k_fold_data,
+ estimator_(),
+ boost::begin(ests)
+ );
+
+ val_ sqrt_mse = statistics::goodness_of_fit::sqrt_mse(
+ boost::begin(ests),
+ boost::end(ests),
+ boost::begin(vals)
+ );
+ val_ mae = statistics::goodness_of_fit::mean_abs_error(
+ boost::begin(ests),
+ boost::end(ests),
+ boost::begin(vals)
+ );
+
+ out << ( format("sqrt_mse = %1%")%sqrt_mse ).str() << std::endl;
+ out << ( format("mae = %1%")%mae ).str() << std::endl;
+
+ out << "k-fold estimates :";
+ std::copy(
+ boost::begin(ests),
+ boost::end(ests),
+ std::ostream_iterator<val_>(out," ")
+ );
+}

Added: sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/example/k_fold.h
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/example/k_fold.h 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,15 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::example::k_fold.h //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef LIBS_STATISTICS_GOODNESS_OF_FIT_EXAMPLE_K_FOLD_H_ER_2009
+#define LIBS_STATISTICS_GOODNESS_OF_FIT_EXAMPLE_K_FOLD_H_ER_2009
+#include <ostream>
+
+void example_k_fold(std::ostream&);
+
+#endif
\ No newline at end of file

Added: sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/src/main.cpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/libs/statistics/cross_validation/src/main.cpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,18 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::src::main.cpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#include <iostream>
+#include <libs/statistics/goodness_of_fit/example/k_fold.h>
+
+int main()
+{
+
+ example_k_fold(std::cout);
+
+ return 0;
+}
\ No newline at end of file

Added: sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/doc/readme.txt
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/doc/readme.txt 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,49 @@
+//////////////////////////////////////////////////////////////////////////////
+// goodness_of_fit::doc::readme //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+
+[ Contact ]
+
+erwann.rogard_at_[hidden]
+
+[ Overview ]
+
+C++ tools for cross validation.
+
+[ Compiler ]
+
+gcc version i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1
+
+[ Dependencies ]
+
+/usr/local/boost_1_39_0/
+/sandbox/statistics/vector_space/
+/sandbox/statistics/functional/
+/sandbox/statistics/estimator_concept/
+/sandbox/statistics/mpl/
+/sandbox/statistics/binary_op/
+/sandbox/statistics/random/
+/sandbox/statistics/arithmetic/
+/sandbox/statistics/scalar_dist/
+/sandbox/statistics/standard_distribution/
+/sandbox/statistics/estimator_concept/
+/sandbox/statistics/dist_random/
+
+[ History ]
+
+August 2009 - Creation
+
+[ TODO ]
+
+Currently, k_fold_data makes copy of the whole dataset and splits it between
+a train and a test dataset. If/when we have a join_view(vec0,vec1) we may be
+able avoid duplicate data.
+
+[ Sources ]
+
+http://en.wikipedia.org/wiki/Cross-validation_%28statistics%29

Added: sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/example/average.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/example/average.hpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,73 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::example::average.h //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef LIBS_STATISTICS_GOODNESS_OF_FIT_EXAMPLE_AVERAGE_H_ER_2009
+#define LIBS_STATISTICS_GOODNESS_OF_FIT_EXAMPLE_AVERAGE_H_ER_2009
+#include <vector>
+#include <boost/tuple/tuple.hpp>
+#include <boost/functional/mean_var_accumulator.hpp>
+#include <boost/binary_op/meta/remove_ref_cv.hpp>
+
+namespace boost{
+namespace statistics{
+
+namespace goodness_of_fit{
+namespace detail{
+
+ // For use by examples, not part of the library.
+ template<typename T>
+ struct average{
+ typedef typename binary_op::tuple_remove_ref_cv<T>::type tuple_;
+ typedef typename tuples::element<1,tuple_>::type y_;
+ typedef typename
+ boost::functional::mean_var_accumulator<y_>::type mean_var_acc_;
+ y_ y;
+ };
+
+}// detail
+}// goodness_of_fit
+
+ template<typename T,typename R>
+ void train(
+ goodness_of_fit::detail::average<T>& e,
+ const R& data
+ ){
+ typedef goodness_of_fit::detail::average<T> average_;
+ typedef typename average_::y_ y_;
+ typedef std::vector<y_> vec_y_;
+ vec_y_ vec_y;
+ binary_op::tails(
+ boost::begin(data),
+ boost::end(data),
+ std::back_inserter(vec_y)
+ );
+
+ typedef typename average_::mean_var_acc_ acc_;
+ e.y = boost::accumulators::mean(
+ std::for_each(
+ boost::begin(vec_y),
+ boost::end(vec_y),
+ acc_()
+ )
+ );
+
+ };
+
+ template<typename T,typename X>
+ typename goodness_of_fit::detail::average<T>::y_
+ estimate(
+ const goodness_of_fit::detail::average<T>& e,
+ const X& x
+ ){
+ return e.y;
+ }
+
+}// statistics
+}// boost
+
+#endif
\ No newline at end of file

Added: sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/example/k_fold.cpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/example/k_fold.cpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,139 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::example::k_fold.cpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#include <algorithm>
+#include <iterator>
+#include <boost/tuple/tuple.hpp>
+#include <boost/foreach.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/format.hpp>
+#include <boost/arithmetic/equal.hpp>
+#include <boost/mpl/nested_type.hpp>
+#include <boost/binary_op/include.hpp>
+#include <boost/dist_random/distributions/normal.hpp>
+#include <boost/standard_distribution/distributions/normal.hpp>
+#include <boost/dist_random/random/include.hpp>
+
+#include <libs/statistics/goodness_of_fit/example/average.hpp>
+#include <boost/statistics/goodness_of_fit/include.hpp>
+
+//#include <boost/statistics/goodness_of_fit/data/k_fold_data.hpp>
+//#include <boost/statistics/goodness_of_fit/algorithm/k_fold_regression_estimate.hpp>
+//#include <boost/statistics/goodness_of_fit/algorithm/sqrt_mse.hpp>
+//#include <boost/statistics/goodness_of_fit/algorithm/mean_abs_error.hpp>
+
+#include <libs/statistics/goodness_of_fit/example/k_fold.h>
+
+void example_k_fold(std::ostream& out)
+{
+
+ // This example shows how to generate k-fold regression data (a sequence of
+ // (x,y) tuples), and use it to generate a k-fold regression estimates
+ using namespace boost;
+ namespace stat = boost::statistics;
+ namespace gof = stat::goodness_of_fit;
+
+ typedef mt19937 urng_;
+ typedef double val_;
+ typedef std::vector<val_> vals_;
+ typedef math::normal_distribution<val_> dist_;
+ typedef binary_op::tuple_range<const vals_&,const vals_&>
+ meta_tuple_range_;
+ typedef meta_tuple_range_::type tuple_range_;
+ typedef range_value<tuple_range_>::type tuple_ref_cv_;
+ typedef binary_op::tuple_remove_ref_cv<
+ tuple_ref_cv_
+ >::type tuple_;
+
+ typedef gof::detail::average<tuple_> estimator_;
+ typedef gof::k_fold_data<tuple_> k_fold_data_;
+ typedef k_fold_data_::test_data_type test_data_;
+
+ const unsigned n = 1e1;
+ const unsigned k = 5e0;
+ BOOST_ASSERT(n % k == 0);
+
+ vals_ vec_x;
+ vals_ vec_y;
+ dist_ dist;
+ urng_ urng;
+
+ generate_n(
+ std::back_inserter(vec_x), n,dist, urng
+ );
+ vec_y = vec_x;
+
+ tuple_range_ vec_xy = meta_tuple_range_::make(
+ vec_x,
+ vec_y
+ );
+
+ k_fold_data_ k_fold_data(
+ k,
+ boost::begin(vec_xy),
+ boost::end(vec_xy)
+ );
+
+ out << k_fold_data;
+
+ // Tests that concatenating the test-data over all k increments, is
+ // identical to vec_xy.
+ for(unsigned i = 0; i<2; i++){
+ if(i>0){ k_fold_data.initialize(); }
+
+ typedef range_iterator<tuple_range_>::type tuples_it_;
+ tuples_it_ tuples_it = boost::begin( vec_xy );
+ while(k_fold_data.j()<k_fold_data.k()){
+ typedef range_value<test_data_>::type t_;
+ BOOST_FOREACH(const t_& t,k_fold_data.test_data())
+ {
+ val_ x = get<0>(t);
+ val_ x1 = get<0>(*tuples_it);
+ val_ y = get<1>(t);
+ val_ y1 = get<1>(*tuples_it);
+ BOOST_ASSERT( arithmetic_tools::equal(x,x1) );
+ BOOST_ASSERT( arithmetic_tools::equal(y,y1) );
+ ++tuples_it;
+ };
+ k_fold_data.increment();
+ }
+ out << std::endl << k_fold_data;
+ }
+
+ vals_ est_vec_y( size(vec_xy) );
+ k_fold_data.initialize();
+ out << std::endl;
+ gof::regression_estimate(
+ k_fold_data,
+ estimator_(),
+ boost::begin(est_vec_y)
+ );
+
+ val_ sqrt_mse = statistics::goodness_of_fit::sqrt_mse(
+ boost::begin( est_vec_y ),
+ boost::end( est_vec_y ),
+ boost::begin( vec_y )
+ );
+ val_ mae = statistics::goodness_of_fit::mean_abs_error(
+ boost::begin( est_vec_y ),
+ boost::end( est_vec_y ),
+ boost::begin( vec_y )
+ );
+
+ out << ( format("sqrt_mse = %1%")%sqrt_mse ).str() << std::endl;
+ out << ( format("mae = %1%")%mae ).str() << std::endl;
+
+ out << "k-fold regression estimates :";
+ std::copy(
+ boost::begin( est_vec_y ),
+ boost::end( est_vec_y ),
+ std::ostream_iterator<val_>(out," ")
+ );
+ // Note that average is a marginal estimate, so expect the data to come
+ // out as : aabbccdd etc.
+}

Added: sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/example/k_fold.h
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/example/k_fold.h 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,15 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::example::k_fold.h //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef LIBS_STATISTICS_GOODNESS_OF_FIT_EXAMPLE_K_FOLD_H_ER_2009
+#define LIBS_STATISTICS_GOODNESS_OF_FIT_EXAMPLE_K_FOLD_H_ER_2009
+#include <ostream>
+
+void example_k_fold(std::ostream&);
+
+#endif
\ No newline at end of file

Added: sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/src/main.cpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/goodness_of_fit/libs/statistics/goodness_of_fit/src/main.cpp 2009-08-27 20:50:10 EDT (Thu, 27 Aug 2009)
@@ -0,0 +1,18 @@
+//////////////////////////////////////////////////////////////////////////////
+// statistics::goodness_of_fit::src::main.cpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#include <iostream>
+#include <libs/statistics/goodness_of_fit/example/k_fold.h>
+
+int main()
+{
+
+ example_k_fold(std::cout);
+
+ return 0;
+}
\ No newline at end of file


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk