Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r56586 - in sandbox/statistics/kernel/libs/statistics/kernel: example src
From: erwann.rogard_at_[hidden]
Date: 2009-10-04 18:55:00


Author: e_r
Date: 2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
New Revision: 56586
URL: http://svn.boost.org/trac/boost/changeset/56586

Log:
m
Text files modified:
   sandbox/statistics/kernel/libs/statistics/kernel/example/benchmark_scalar.cpp | 15 +--
   sandbox/statistics/kernel/libs/statistics/kernel/example/kernel_mono_rp.cpp | 53 +++++++------
   sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_nw.cpp | 158 +++++++++++++++++++++++----------------
   sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_rp.cpp | 32 ++++----
   sandbox/statistics/kernel/libs/statistics/kernel/src/main.cpp | 9 +-
   5 files changed, 150 insertions(+), 117 deletions(-)

Modified: sandbox/statistics/kernel/libs/statistics/kernel/example/benchmark_scalar.cpp
==============================================================================
--- sandbox/statistics/kernel/libs/statistics/kernel/example/benchmark_scalar.cpp (original)
+++ sandbox/statistics/kernel/libs/statistics/kernel/example/benchmark_scalar.cpp 2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
@@ -5,6 +5,8 @@
 // Software License, Version 1.0. (See accompanying file //
 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
 ///////////////////////////////////////////////////////////////////////////////
+/*
+
 #include <cmath>
 #include <vector>
 #include <algorithm>
@@ -19,14 +21,9 @@
 #include <boost/math/tools/precision.hpp>
 
 // Order of the files matters!
-#include <boost/standard_distribution/distributions/normal.hpp>
-#include <boost/scalar_dist/fun_wrap/pdf.hpp>
-#include <boost/dist_random/distributions/normal.hpp>
-#include <boost/dist_random/random/generate_n.hpp>
-
-//#include <boost/scalar_dist/fun_wrap/pdf.hpp>
-//#include <boost/scalar_dist/meta/delegate.hpp>
-//#include <boost/scalar_dist/algorithm/transform.hpp>
+#include <boost/statistics/detail/distribution_toolkit/distributions/normal.hpp>
+#include <boost/statistics/detail/distribution_toolkit/fwd_math/cdf.hpp> // ?!
+
 
 #include <boost/binary_op/data/tuple_range.hpp>
 
@@ -268,7 +265,9 @@
     // DO the same for rp
     
     out << "<-" << std::endl;
+
 }
+*/
 
 
 

Modified: sandbox/statistics/kernel/libs/statistics/kernel/example/kernel_mono_rp.cpp
==============================================================================
--- sandbox/statistics/kernel/libs/statistics/kernel/example/kernel_mono_rp.cpp (original)
+++ sandbox/statistics/kernel/libs/statistics/kernel/example/kernel_mono_rp.cpp 2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
@@ -21,32 +21,40 @@
 #include <boost/statistics/kernel/functional/rp_visitor.hpp>
 #include <boost/statistics/kernel/functional/estimator.hpp>
 #include <libs/statistics/kernel/example/scalar_rp.h>
-
 void example_kernel_mono_rp(std::ostream& out){
     out << "-> example_kernel_mono_rp : ";
+
+
+ // This example shows how to compute a Rosenblatt-Parzen estimate of the
+ // density, p(x). The type used for each data-unit, x, is a vector of
+ // doubles, and the kernel uses the same bandwidth throughout all
+ // coordinates
+
     using namespace boost;
+ namespace kernel = boost::statistics::kernel;
     
     // Types
     typedef double val_;
     typedef std::vector<val_> vec_;
- typedef std::vector<vec_> mat_;
+ typedef vec_ x_;
+ typedef std::vector<x_> dataset_;
     typedef mt19937 urng_;
     typedef normal_distribution<val_> norm_;
     typedef variate_generator<urng_&,norm_> gen_;
- typedef statistics::kernel::scalar::gaussian<val_> gauss_k_;
+ typedef kernel::scalar::gaussian<val_> gauss_k_;
 
     const unsigned dim = 2;
- typedef statistics::kernel::joint::kernel_mono<gauss_k_,dim> kernel_mono_k_;
- // NB const vec_&, not vec_
- typedef statistics::kernel::rp_visitor<kernel_mono_k_,const vec_&> rp_visitor_;
+ typedef kernel::joint::kernel_mono<gauss_k_,dim> kernel_mono_k_;
+ // Use of a const reference is not necessary but probably improves speed
+ typedef kernel::rp_visitor<kernel_mono_k_,const x_&> rp_visitor_;
     
     // Constants
     const val_ bandwidth = 0.5;
     const val_ eps = math::tools::epsilon<val_>();
     const unsigned n = 10;
     
- // Generate sample
- mat_ vec_x; vec_x.reserve(n);
+ // Generate n samples, each drawn from prod{N(0,1):i=1,...,dim}
+ dataset_ dataset; dataset.reserve(n);
     vec_ vec_rp; vec_rp.reserve(n);
     urng_ urng;
     norm_ norm;
@@ -54,41 +62,38 @@
     for(unsigned i = 0; i<n; i++){
         vec_ tmp(dim);
         std::generate_n(
- begin(tmp),
+ boost::begin(tmp),
             dim,
             gen
         );
- vec_x.push_back( tmp );
+ dataset.push_back( tmp );
     }
 
- kernel_mono_k_ kernel_mono_k(bandwidth);
-
- kernel_mono_k(vec_x[0],vec_x[1]);
- // Density estimate for each x in vec_x using vec_x as the sample
- BOOST_FOREACH(const vec_& x,vec_x){
+ // Density estimate for each x in dataset
+ BOOST_FOREACH(const x_& x,dataset){
         val_ rp = std::for_each(
- begin(vec_x),
- end(vec_x),
+ boost::begin(dataset),
+ boost::end(dataset),
             rp_visitor_(bandwidth,x)
         ).estimate();
         vec_rp.push_back(rp);
     }
- typedef sub_range<mat_> sub_;
- typedef statistics::kernel::estimator<
+ typedef sub_range<dataset_> sub_;
+ typedef kernel::estimator<
         sub_,
- statistics::kernel::rp_visitor,
+ kernel::rp_visitor,
         kernel_mono_k_
> estimator_;
     estimator_ estimator(bandwidth);
- statistics::train(estimator,sub_(vec_x));
+ estimator.train(sub_(dataset));
     vec_ vec_rp2; vec_rp2.reserve(n);
 
     // Same as previous but calls estimator instead of for_each
     for(unsigned i = 0; i<n; i++){
- vec_ x = vec_x[i];
+ x_ x = dataset[i];
         val_ rp = vec_rp[i];
- val_ rp2 = estimator(x).estimate();
+ val_ rp2 = estimator.predict(x);
         BOOST_ASSERT(fabs(rp-rp2)<eps);
     }
     out << "<-" << std::endl;
-}
\ No newline at end of file
+}

Modified: sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_nw.cpp
==============================================================================
--- sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_nw.cpp (original)
+++ sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_nw.cpp 2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
@@ -17,28 +17,59 @@
 #include <boost/math/tools/precision.hpp>
 #include <boost/typeof/typeof.hpp>
 
-#include <boost/binary_op/data/tuple_range.hpp>
-//#include <boost/statistics/estimator_concept/trainable_estimator/concept.hpp>
+#include <boost/fusion/sequence/intrinsic/at_key.hpp>
+#include <boost/fusion/include/at_key.hpp>
+#include <boost/fusion/container/map.hpp>
+#include <boost/fusion/include/map.hpp>
+#include <boost/fusion/include/map_fwd.hpp>
+
+
+#include <boost/statistics/detail/fusion/functor/at_key.hpp>
+
 #include <boost/statistics/kernel/scalar/gaussian.hpp>
-#include <boost/statistics/kernel/functional/nw_visitor_tuple.hpp>
+#include <boost/statistics/kernel/functional/meta_nw_visitor_unary.hpp>
 #include <boost/statistics/kernel/functional/estimator.hpp>
 #include <libs/statistics/kernel/example/scalar_nw.h>
 
 void example_scalar_nw(std::ostream& out){
+
     out << "-> example_scalar_nw : ";
     using namespace boost;
 
+ namespace kernel = boost::statistics::kernel;
+
+ // This example shows how to compute a Nadaraya-Watson estimate of E[y|x].
+ // The type used for each data-unit, here, is a fusion map whose x and y
+ // components are accessed using keys
+
     // Types
     typedef double val_;
- typedef std::vector<val_> vec_;
+ typedef std::vector<val_> vals_;
+ typedef mpl::int_<0> key_x_;
+ typedef mpl::int_<1> key_y_;
+ typedef fusion::pair<key_x_,val_> x_;
+ typedef fusion::pair<key_y_,val_> y_;
+ typedef statistics::detail::fusion::functor::at_key<key_x_> at_key_x_;
+ typedef statistics::detail::fusion::functor::at_key<key_y_> at_key_y_;
+ typedef fusion::map<x_,y_> data_unit_;
+ typedef std::vector<data_unit_> dataset_;
+ // The rationale for data_range_ is it's cheap to copy
+ typedef sub_range<dataset_> data_range_;
+
     typedef mt19937 urng_;
     typedef normal_distribution<val_> norm_;
     typedef variate_generator<urng_&,norm_> gen_;
- typedef statistics::kernel::scalar::gaussian<val_> gauss_k_;
- typedef statistics::kernel::nw_visitor_tuple<gauss_k_,val_>
- nw_visitor_tuple_;
- typedef nw_visitor_tuple_::nw_visitor_type nw_visitor_;
- typedef nw_visitor_tuple_::rp_visitor_type rp_visitor_;
+ typedef kernel::scalar::gaussian<val_> gauss_k_;
+ typedef kernel::meta_nw_visitor_unary<
+ at_key_x_,
+ at_key_y_
+ > meta_nw_visitor_u_;
+ typedef meta_nw_visitor_u_::apply<
+ gauss_k_,
+ val_
+ >::type nw_visitor_u_;
+ typedef nw_visitor_u_::nw_visitor_type nw_visitor_;
+ typedef nw_visitor_u_::rp_visitor_type rp_visitor_;
     
     // Constants
     const val_ bandwidth = 0.5;
@@ -46,73 +77,70 @@
     const unsigned n = 10;
     
     // Initialization
- vec_ vec_x(n);
- vec_ vec_y(n,static_cast<val_>(1));
- vec_ vec_rp; vec_rp.reserve(n);
- vec_ vec_nw; vec_nw.reserve(n);
- urng_ urng;
- norm_ norm;
- gen_ gen(urng,norm);
- std::generate_n(
- begin(vec_x),
- n,
- gen
- );
-
- // Computes a conditional mean estimate (nw) for each x in vec_x using
- // a sequence of (x,y) tuples constructed from (vec_x,vec_y) as training
- // sample. The density (rp) is computed as a by-product.
- // Here, y = 1, so we should have rp = nw (un-normalized).
- BOOST_FOREACH(val_& x,vec_x){
- typedef binary_op::tuple_range<const vec_&,const vec_&> factory_;
- typedef factory_::type range_tuple_;
- range_tuple_ range_tuple = factory_::make(vec_x,vec_y);
+ vals_ vec_rp; vec_rp.reserve(n);
+ vals_ vec_nw; vec_nw.reserve(n);
+ dataset_ dataset;
+ dataset.reserve(n);
+ {
+ urng_ urng;
+ norm_ norm;
+ gen_ gen(urng,norm);
+ val_ one = static_cast<val_>(1);
+ for(unsigned i = 0; i<n; i++){
+ dataset.push_back(
+ data_unit_(
+ fusion::make_pair<key_x_>(gen()),
+ fusion::make_pair<key_y_>(one)
+ )
+ );
+ }
+ }
+
+ // Computes nw = E[y|x] for each x in the dataset. The density (rp) is
+ // obtained as a by-product. Here, y = 1, so we should have
+ // rp = nw (un-normalized).
+ BOOST_FOREACH(data_unit_& u,dataset){
         nw_visitor_ nw_visitor = std::for_each(
- begin(range_tuple),
- end(range_tuple),
- nw_visitor_tuple_(bandwidth,x)
- ).nw_visitor();
+ boost::begin(dataset),
+ boost::end(dataset),
+ nw_visitor_u_(
+ bandwidth,
+ fusion::at_key<key_x_>(u)
+ )
+ );
         val_ u_nw = nw_visitor.unnormalized_estimate();
         vec_nw.push_back(u_nw);
         rp_visitor_ rp_visitor = nw_visitor.rp_visitor();
         val_ rp = rp_visitor.estimate();
         BOOST_ASSERT(fabs(rp-u_nw)<eps);
- }
-
- typedef binary_op::tuple_range<const vec_&,const vec_&> factory_;
- typedef factory_::type range_xy_;
- range_xy_ range_xy = factory_::make(vec_x,vec_y);
- // A pair of iterators is cheap to copy so no need to pass it by reference
- typedef statistics::kernel::estimator<
- range_xy_,
- statistics::kernel::nw_visitor_tuple,
+ }
+
+ // Same as above using estimator
+
+ typedef kernel::estimator<
+ data_range_,
+ meta_nw_visitor_u_::apply,
         gauss_k_
> estimator_;
     estimator_ estimator(bandwidth);
- statistics::train(estimator,range_xy);
-
- // Same as previous but calls estimator instead of for_each
- BOOST_FOREACH(val_& x,vec_x){
- // A local definition of nw_visitor_ is needed because x is passed
- // by ref, not by value as in that outside the scope
- typedef estimator_::result<val_>::type result_type;
- typedef result_type::nw_visitor_type nw_visitor_;
- typedef result_type::rp_visitor_type rp_visitor_;
- nw_visitor_ nw_visitor = estimator(x).nw_visitor();
- val_ u_nw = nw_visitor.unnormalized_estimate();
- rp_visitor_ rp_visitor = nw_visitor.rp_visitor();
- val_ rp = rp_visitor.estimate();
- BOOST_ASSERT(fabs(rp-u_nw)<eps);
- }
+ estimator.train(
+ data_range_(dataset)
+ ); // * step 1 *
+
     
- // Shorter version of the above
- BOOST_FOREACH(val_& x,vec_x){
- BOOST_AUTO( nw_visitor , estimator(x).nw_visitor() );
- val_ u_nw = nw_visitor.unnormalized_estimate();
- BOOST_AUTO( rp_visitor , nw_visitor.rp_visitor() );
- val_ rp = rp_visitor.estimate();
+ BOOST_FOREACH(data_unit_& u,dataset){
+ // -> these steps are independent of step2, they're just a test
+ val_ x = fusion::at_key<key_x_>(u);
+ BOOST_AUTO( nw_v , estimator.visit(x) );
+ val_ u_nw = nw_v.unnormalized_estimate();
+ BOOST_AUTO( rp_v , nw_v.rp_visitor() );
+ val_ rp = rp_v.estimate();
         BOOST_ASSERT(fabs(rp-u_nw)<eps);
+ // <-
+
+ estimator.predict(x); // * step 2 *
+
     }
     
     out << "<-" << std::endl;
-}
\ No newline at end of file
+}

Modified: sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_rp.cpp
==============================================================================
--- sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_rp.cpp (original)
+++ sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_rp.cpp 2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
@@ -15,6 +15,7 @@
 #include <boost/random/variate_generator.hpp>
 #include <boost/math/special_functions/fpclassify.hpp> //needed?
 #include <boost/math/tools/precision.hpp>
+#include <boost/mpl/int.hpp>
 
 #include <boost/statistics/kernel/scalar/gaussian.hpp>
 #include <boost/statistics/kernel/functional/rp_visitor.hpp>
@@ -25,6 +26,9 @@
     out << "-> example_scalar_rp : ";
     using namespace boost;
 
+ // This example shows how to compute a Rosenblatt-Parzen estimate of the
+ // density, p(x). The type used for each data-unit, here, is double
+
     //Types
     typedef double val_;
     typedef std::vector<val_> vec_;
@@ -40,33 +44,28 @@
     const unsigned n = 10;
 
     // Initialization
- vec_ vec_x(n);
+ vec_ dataset(n);
     vec_ vec_rp; vec_rp.reserve(n);
     urng_ urng;
     norm_ norm;
     gen_ gen(urng,norm);
     std::generate_n(
- begin(vec_x),
+ begin(dataset),
         n,
         gen
     );
 
- // Computes a density estimate for each x in vec_x using vec_x as sample
- BOOST_FOREACH(val_& x,vec_x){
+ // Computes a density estimate for each x in dataset
+ BOOST_FOREACH(val_& x,dataset){
         val_ rp = for_each(
- begin(vec_x),
- end(vec_x),
+ boost::begin(dataset),
+ boost::end(dataset),
             rp_visitor_(bandwidth,x)
         ).estimate();
         vec_rp.push_back(rp);
     }
 
- std::copy(
- begin(vec_rp),
- end(vec_rp),
- std::ostream_iterator<val_>(out," ")
- );
-
+ // Same as previous but calls estimator instead of for_each
     typedef sub_range<vec_> sub_x_;
     typedef
         statistics::kernel::estimator<
@@ -75,16 +74,17 @@
             gauss_k_
> estimator_;
     estimator_ estimator(bandwidth);
- statistics::train(estimator,sub_x_(vec_x));
+ sub_x_ sub_x(dataset);
+ estimator.train(sub_x);
     vec_ vec_rp2; vec_rp2.reserve(n);
     
- // Same as previous but calls estimator instead of for_each
     for(unsigned i = 0; i<n; i++){
- val_ x = vec_x[i];
+ val_ x = dataset[i];
         val_ rp = vec_rp[i];
- val_ rp2 = estimator(x).estimate();
+ val_ rp2 = estimator.predict(x);
         BOOST_ASSERT(fabs(rp-rp2)<eps);
     }
             
     out << "<-" << std::endl;
+
 }
\ No newline at end of file

Modified: sandbox/statistics/kernel/libs/statistics/kernel/src/main.cpp
==============================================================================
--- sandbox/statistics/kernel/libs/statistics/kernel/src/main.cpp (original)
+++ sandbox/statistics/kernel/libs/statistics/kernel/src/main.cpp 2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
@@ -11,12 +11,13 @@
 #include <libs/statistics/kernel/example/kernel_mono_rp.h>
 #include <libs/statistics/kernel/example/benchmark_scalar.h>
 
+
 int main(){
 
- // example_scalar_rp(std::cout);
- // example_scalar_nw(std::cout);
- // example_kernel_mono_rp(std::cout);
- example_benchmark_scalar(std::cout);
+ example_scalar_rp(std::cout);
+ example_scalar_nw(std::cout);
+ example_kernel_mono_rp(std::cout);
+ //example_benchmark_scalar(std::cout);
     
     return 0;
 }
\ No newline at end of file


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk