|
Boost-Commit : |
Subject: [Boost-commit] svn:boost r56586 - in sandbox/statistics/kernel/libs/statistics/kernel: example src
From: erwann.rogard_at_[hidden]
Date: 2009-10-04 18:55:00
Author: e_r
Date: 2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
New Revision: 56586
URL: http://svn.boost.org/trac/boost/changeset/56586
Log:
m
Text files modified:
sandbox/statistics/kernel/libs/statistics/kernel/example/benchmark_scalar.cpp | 15 +--
sandbox/statistics/kernel/libs/statistics/kernel/example/kernel_mono_rp.cpp | 53 +++++++------
sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_nw.cpp | 158 +++++++++++++++++++++++----------------
sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_rp.cpp | 32 ++++----
sandbox/statistics/kernel/libs/statistics/kernel/src/main.cpp | 9 +-
5 files changed, 150 insertions(+), 117 deletions(-)
Modified: sandbox/statistics/kernel/libs/statistics/kernel/example/benchmark_scalar.cpp
==============================================================================
--- sandbox/statistics/kernel/libs/statistics/kernel/example/benchmark_scalar.cpp (original)
+++ sandbox/statistics/kernel/libs/statistics/kernel/example/benchmark_scalar.cpp 2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
@@ -5,6 +5,8 @@
// Software License, Version 1.0. (See accompanying file //
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
///////////////////////////////////////////////////////////////////////////////
+/*
+
#include <cmath>
#include <vector>
#include <algorithm>
@@ -19,14 +21,9 @@
#include <boost/math/tools/precision.hpp>
// Order of the files matters!
-#include <boost/standard_distribution/distributions/normal.hpp>
-#include <boost/scalar_dist/fun_wrap/pdf.hpp>
-#include <boost/dist_random/distributions/normal.hpp>
-#include <boost/dist_random/random/generate_n.hpp>
-
-//#include <boost/scalar_dist/fun_wrap/pdf.hpp>
-//#include <boost/scalar_dist/meta/delegate.hpp>
-//#include <boost/scalar_dist/algorithm/transform.hpp>
+#include <boost/statistics/detail/distribution_toolkit/distributions/normal.hpp>
+#include <boost/statistics/detail/distribution_toolkit/fwd_math/cdf.hpp> // ?!
+
#include <boost/binary_op/data/tuple_range.hpp>
@@ -268,7 +265,9 @@
// DO the same for rp
out << "<-" << std::endl;
+
}
+*/
Modified: sandbox/statistics/kernel/libs/statistics/kernel/example/kernel_mono_rp.cpp
==============================================================================
--- sandbox/statistics/kernel/libs/statistics/kernel/example/kernel_mono_rp.cpp (original)
+++ sandbox/statistics/kernel/libs/statistics/kernel/example/kernel_mono_rp.cpp 2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
@@ -21,32 +21,40 @@
#include <boost/statistics/kernel/functional/rp_visitor.hpp>
#include <boost/statistics/kernel/functional/estimator.hpp>
#include <libs/statistics/kernel/example/scalar_rp.h>
-
void example_kernel_mono_rp(std::ostream& out){
out << "-> example_kernel_mono_rp : ";
+
+
+ // This example shows how to compute a Rosenblatt-Parzen estimate of the
+ // density, p(x). The type used for each data-unit, x, is a vector of
+ // doubles, and the kernel uses the same bandwidth throughout all
+ // coordinates
+
using namespace boost;
+ namespace kernel = boost::statistics::kernel;
// Types
typedef double val_;
typedef std::vector<val_> vec_;
- typedef std::vector<vec_> mat_;
+ typedef vec_ x_;
+ typedef std::vector<x_> dataset_;
typedef mt19937 urng_;
typedef normal_distribution<val_> norm_;
typedef variate_generator<urng_&,norm_> gen_;
- typedef statistics::kernel::scalar::gaussian<val_> gauss_k_;
+ typedef kernel::scalar::gaussian<val_> gauss_k_;
const unsigned dim = 2;
- typedef statistics::kernel::joint::kernel_mono<gauss_k_,dim> kernel_mono_k_;
- // NB const vec_&, not vec_
- typedef statistics::kernel::rp_visitor<kernel_mono_k_,const vec_&> rp_visitor_;
+ typedef kernel::joint::kernel_mono<gauss_k_,dim> kernel_mono_k_;
+ // Use of a const reference is not necessary but probably improves speed
+ typedef kernel::rp_visitor<kernel_mono_k_,const x_&> rp_visitor_;
// Constants
const val_ bandwidth = 0.5;
const val_ eps = math::tools::epsilon<val_>();
const unsigned n = 10;
- // Generate sample
- mat_ vec_x; vec_x.reserve(n);
+ // Generate n samples, each drawn from prod{N(0,1):i=1,...,dim}
+ dataset_ dataset; dataset.reserve(n);
vec_ vec_rp; vec_rp.reserve(n);
urng_ urng;
norm_ norm;
@@ -54,41 +62,38 @@
for(unsigned i = 0; i<n; i++){
vec_ tmp(dim);
std::generate_n(
- begin(tmp),
+ boost::begin(tmp),
dim,
gen
);
- vec_x.push_back( tmp );
+ dataset.push_back( tmp );
}
- kernel_mono_k_ kernel_mono_k(bandwidth);
-
- kernel_mono_k(vec_x[0],vec_x[1]);
- // Density estimate for each x in vec_x using vec_x as the sample
- BOOST_FOREACH(const vec_& x,vec_x){
+ // Density estimate for each x in dataset
+ BOOST_FOREACH(const x_& x,dataset){
val_ rp = std::for_each(
- begin(vec_x),
- end(vec_x),
+ boost::begin(dataset),
+ boost::end(dataset),
rp_visitor_(bandwidth,x)
).estimate();
vec_rp.push_back(rp);
}
- typedef sub_range<mat_> sub_;
- typedef statistics::kernel::estimator<
+ typedef sub_range<dataset_> sub_;
+ typedef kernel::estimator<
sub_,
- statistics::kernel::rp_visitor,
+ kernel::rp_visitor,
kernel_mono_k_
> estimator_;
estimator_ estimator(bandwidth);
- statistics::train(estimator,sub_(vec_x));
+ estimator.train(sub_(dataset));
vec_ vec_rp2; vec_rp2.reserve(n);
// Same as previous but calls estimator instead of for_each
for(unsigned i = 0; i<n; i++){
- vec_ x = vec_x[i];
+ x_ x = dataset[i];
val_ rp = vec_rp[i];
- val_ rp2 = estimator(x).estimate();
+ val_ rp2 = estimator.predict(x);
BOOST_ASSERT(fabs(rp-rp2)<eps);
}
out << "<-" << std::endl;
-}
\ No newline at end of file
+}
Modified: sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_nw.cpp
==============================================================================
--- sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_nw.cpp (original)
+++ sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_nw.cpp 2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
@@ -17,28 +17,59 @@
#include <boost/math/tools/precision.hpp>
#include <boost/typeof/typeof.hpp>
-#include <boost/binary_op/data/tuple_range.hpp>
-//#include <boost/statistics/estimator_concept/trainable_estimator/concept.hpp>
+#include <boost/fusion/sequence/intrinsic/at_key.hpp>
+#include <boost/fusion/include/at_key.hpp>
+#include <boost/fusion/container/map.hpp>
+#include <boost/fusion/include/map.hpp>
+#include <boost/fusion/include/map_fwd.hpp>
+
+
+#include <boost/statistics/detail/fusion/functor/at_key.hpp>
+
#include <boost/statistics/kernel/scalar/gaussian.hpp>
-#include <boost/statistics/kernel/functional/nw_visitor_tuple.hpp>
+#include <boost/statistics/kernel/functional/meta_nw_visitor_unary.hpp>
#include <boost/statistics/kernel/functional/estimator.hpp>
#include <libs/statistics/kernel/example/scalar_nw.h>
void example_scalar_nw(std::ostream& out){
+
out << "-> example_scalar_nw : ";
using namespace boost;
+ namespace kernel = boost::statistics::kernel;
+
+ // This example shows how to compute a Nadaraya-Watson estimate of E[y|x].
+ // The type used for each data-unit, here, is a fusion map whose x and y
+ // components are accessed using keys
+
// Types
typedef double val_;
- typedef std::vector<val_> vec_;
+ typedef std::vector<val_> vals_;
+ typedef mpl::int_<0> key_x_;
+ typedef mpl::int_<1> key_y_;
+ typedef fusion::pair<key_x_,val_> x_;
+ typedef fusion::pair<key_y_,val_> y_;
+ typedef statistics::detail::fusion::functor::at_key<key_x_> at_key_x_;
+ typedef statistics::detail::fusion::functor::at_key<key_y_> at_key_y_;
+ typedef fusion::map<x_,y_> data_unit_;
+ typedef std::vector<data_unit_> dataset_;
+ // The rationale for data_range_ is it's cheap to copy
+ typedef sub_range<dataset_> data_range_;
+
typedef mt19937 urng_;
typedef normal_distribution<val_> norm_;
typedef variate_generator<urng_&,norm_> gen_;
- typedef statistics::kernel::scalar::gaussian<val_> gauss_k_;
- typedef statistics::kernel::nw_visitor_tuple<gauss_k_,val_>
- nw_visitor_tuple_;
- typedef nw_visitor_tuple_::nw_visitor_type nw_visitor_;
- typedef nw_visitor_tuple_::rp_visitor_type rp_visitor_;
+ typedef kernel::scalar::gaussian<val_> gauss_k_;
+ typedef kernel::meta_nw_visitor_unary<
+ at_key_x_,
+ at_key_y_
+ > meta_nw_visitor_u_;
+ typedef meta_nw_visitor_u_::apply<
+ gauss_k_,
+ val_
+ >::type nw_visitor_u_;
+ typedef nw_visitor_u_::nw_visitor_type nw_visitor_;
+ typedef nw_visitor_u_::rp_visitor_type rp_visitor_;
// Constants
const val_ bandwidth = 0.5;
@@ -46,73 +77,70 @@
const unsigned n = 10;
// Initialization
- vec_ vec_x(n);
- vec_ vec_y(n,static_cast<val_>(1));
- vec_ vec_rp; vec_rp.reserve(n);
- vec_ vec_nw; vec_nw.reserve(n);
- urng_ urng;
- norm_ norm;
- gen_ gen(urng,norm);
- std::generate_n(
- begin(vec_x),
- n,
- gen
- );
-
- // Computes a conditional mean estimate (nw) for each x in vec_x using
- // a sequence of (x,y) tuples constructed from (vec_x,vec_y) as training
- // sample. The density (rp) is computed as a by-product.
- // Here, y = 1, so we should have rp = nw (un-normalized).
- BOOST_FOREACH(val_& x,vec_x){
- typedef binary_op::tuple_range<const vec_&,const vec_&> factory_;
- typedef factory_::type range_tuple_;
- range_tuple_ range_tuple = factory_::make(vec_x,vec_y);
+ vals_ vec_rp; vec_rp.reserve(n);
+ vals_ vec_nw; vec_nw.reserve(n);
+ dataset_ dataset;
+ dataset.reserve(n);
+ {
+ urng_ urng;
+ norm_ norm;
+ gen_ gen(urng,norm);
+ val_ one = static_cast<val_>(1);
+ for(unsigned i = 0; i<n; i++){
+ dataset.push_back(
+ data_unit_(
+ fusion::make_pair<key_x_>(gen()),
+ fusion::make_pair<key_y_>(one)
+ )
+ );
+ }
+ }
+
+ // Computes nw = E[y|x] for each x in the dataset. The density (rp) is
+ // obtained as a by-product. Here, y = 1, so we should have
+ // rp = nw (un-normalized).
+ BOOST_FOREACH(data_unit_& u,dataset){
nw_visitor_ nw_visitor = std::for_each(
- begin(range_tuple),
- end(range_tuple),
- nw_visitor_tuple_(bandwidth,x)
- ).nw_visitor();
+ boost::begin(dataset),
+ boost::end(dataset),
+ nw_visitor_u_(
+ bandwidth,
+ fusion::at_key<key_x_>(u)
+ )
+ );
val_ u_nw = nw_visitor.unnormalized_estimate();
vec_nw.push_back(u_nw);
rp_visitor_ rp_visitor = nw_visitor.rp_visitor();
val_ rp = rp_visitor.estimate();
BOOST_ASSERT(fabs(rp-u_nw)<eps);
- }
-
- typedef binary_op::tuple_range<const vec_&,const vec_&> factory_;
- typedef factory_::type range_xy_;
- range_xy_ range_xy = factory_::make(vec_x,vec_y);
- // A pair of iterators is cheap to copy so no need to pass it by reference
- typedef statistics::kernel::estimator<
- range_xy_,
- statistics::kernel::nw_visitor_tuple,
+ }
+
+ // Same as above using estimator
+
+ typedef kernel::estimator<
+ data_range_,
+ meta_nw_visitor_u_::apply,
gauss_k_
> estimator_;
estimator_ estimator(bandwidth);
- statistics::train(estimator,range_xy);
-
- // Same as previous but calls estimator instead of for_each
- BOOST_FOREACH(val_& x,vec_x){
- // A local definition of nw_visitor_ is needed because x is passed
- // by ref, not by value as in that outside the scope
- typedef estimator_::result<val_>::type result_type;
- typedef result_type::nw_visitor_type nw_visitor_;
- typedef result_type::rp_visitor_type rp_visitor_;
- nw_visitor_ nw_visitor = estimator(x).nw_visitor();
- val_ u_nw = nw_visitor.unnormalized_estimate();
- rp_visitor_ rp_visitor = nw_visitor.rp_visitor();
- val_ rp = rp_visitor.estimate();
- BOOST_ASSERT(fabs(rp-u_nw)<eps);
- }
+ estimator.train(
+ data_range_(dataset)
+ ); // * step 1 *
+
- // Shorter version of the above
- BOOST_FOREACH(val_& x,vec_x){
- BOOST_AUTO( nw_visitor , estimator(x).nw_visitor() );
- val_ u_nw = nw_visitor.unnormalized_estimate();
- BOOST_AUTO( rp_visitor , nw_visitor.rp_visitor() );
- val_ rp = rp_visitor.estimate();
+ BOOST_FOREACH(data_unit_& u,dataset){
+ // -> these steps are independent of step2, they're just a test
+ val_ x = fusion::at_key<key_x_>(u);
+ BOOST_AUTO( nw_v , estimator.visit(x) );
+ val_ u_nw = nw_v.unnormalized_estimate();
+ BOOST_AUTO( rp_v , nw_v.rp_visitor() );
+ val_ rp = rp_v.estimate();
BOOST_ASSERT(fabs(rp-u_nw)<eps);
+ // <-
+
+ estimator.predict(x); // * step 2 *
+
}
out << "<-" << std::endl;
-}
\ No newline at end of file
+}
Modified: sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_rp.cpp
==============================================================================
--- sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_rp.cpp (original)
+++ sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_rp.cpp 2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
@@ -15,6 +15,7 @@
#include <boost/random/variate_generator.hpp>
#include <boost/math/special_functions/fpclassify.hpp> //needed?
#include <boost/math/tools/precision.hpp>
+#include <boost/mpl/int.hpp>
#include <boost/statistics/kernel/scalar/gaussian.hpp>
#include <boost/statistics/kernel/functional/rp_visitor.hpp>
@@ -25,6 +26,9 @@
out << "-> example_scalar_rp : ";
using namespace boost;
+ // This example shows how to compute a Rosenblatt-Parzen estimate of the
+ // density, p(x). The type used for each data-unit, here, is double
+
//Types
typedef double val_;
typedef std::vector<val_> vec_;
@@ -40,33 +44,28 @@
const unsigned n = 10;
// Initialization
- vec_ vec_x(n);
+ vec_ dataset(n);
vec_ vec_rp; vec_rp.reserve(n);
urng_ urng;
norm_ norm;
gen_ gen(urng,norm);
std::generate_n(
- begin(vec_x),
+ begin(dataset),
n,
gen
);
- // Computes a density estimate for each x in vec_x using vec_x as sample
- BOOST_FOREACH(val_& x,vec_x){
+ // Computes a density estimate for each x in dataset
+ BOOST_FOREACH(val_& x,dataset){
val_ rp = for_each(
- begin(vec_x),
- end(vec_x),
+ boost::begin(dataset),
+ boost::end(dataset),
rp_visitor_(bandwidth,x)
).estimate();
vec_rp.push_back(rp);
}
- std::copy(
- begin(vec_rp),
- end(vec_rp),
- std::ostream_iterator<val_>(out," ")
- );
-
+ // Same as previous but calls estimator instead of for_each
typedef sub_range<vec_> sub_x_;
typedef
statistics::kernel::estimator<
@@ -75,16 +74,17 @@
gauss_k_
> estimator_;
estimator_ estimator(bandwidth);
- statistics::train(estimator,sub_x_(vec_x));
+ sub_x_ sub_x(dataset);
+ estimator.train(sub_x);
vec_ vec_rp2; vec_rp2.reserve(n);
- // Same as previous but calls estimator instead of for_each
for(unsigned i = 0; i<n; i++){
- val_ x = vec_x[i];
+ val_ x = dataset[i];
val_ rp = vec_rp[i];
- val_ rp2 = estimator(x).estimate();
+ val_ rp2 = estimator.predict(x);
BOOST_ASSERT(fabs(rp-rp2)<eps);
}
out << "<-" << std::endl;
+
}
\ No newline at end of file
Modified: sandbox/statistics/kernel/libs/statistics/kernel/src/main.cpp
==============================================================================
--- sandbox/statistics/kernel/libs/statistics/kernel/src/main.cpp (original)
+++ sandbox/statistics/kernel/libs/statistics/kernel/src/main.cpp 2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
@@ -11,12 +11,13 @@
#include <libs/statistics/kernel/example/kernel_mono_rp.h>
#include <libs/statistics/kernel/example/benchmark_scalar.h>
+
int main(){
- // example_scalar_rp(std::cout);
- // example_scalar_nw(std::cout);
- // example_kernel_mono_rp(std::cout);
- example_benchmark_scalar(std::cout);
+ example_scalar_rp(std::cout);
+ example_scalar_nw(std::cout);
+ example_kernel_mono_rp(std::cout);
+ //example_benchmark_scalar(std::cout);
return 0;
}
\ No newline at end of file
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk