Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r55937 - in sandbox/statistics/empirical_cdf: . boost boost/statistics boost/statistics/empirical_cdf boost/statistics/empirical_cdf/algorithm libs libs/statistics libs/statistics/empirical_cdf libs/statistics/empirical_cdf/doc libs/statistics/empirical_cdf/example libs/statistics/empirical_cdf/src
From: erwann.rogard_at_[hidden]
Date: 2009-08-31 19:23:25


Author: e_r
Date: 2009-08-31 19:23:23 EDT (Mon, 31 Aug 2009)
New Revision: 55937
URL: http://svn.boost.org/trac/boost/changeset/55937

Log:
a
Added:
   sandbox/statistics/empirical_cdf/
   sandbox/statistics/empirical_cdf/boost/
   sandbox/statistics/empirical_cdf/boost/statistics/
   sandbox/statistics/empirical_cdf/boost/statistics/empirical_cdf/
   sandbox/statistics/empirical_cdf/boost/statistics/empirical_cdf/algorithm/
   sandbox/statistics/empirical_cdf/boost/statistics/empirical_cdf/algorithm/cdf_empirical_cdf_differences.hpp (contents, props changed)
   sandbox/statistics/empirical_cdf/boost/statistics/empirical_cdf/algorithm/kolmogorov_smirnov_distance.hpp (contents, props changed)
   sandbox/statistics/empirical_cdf/boost/statistics/empirical_cdf/algorithm/proportion_less_than.hpp (contents, props changed)
   sandbox/statistics/empirical_cdf/boost/statistics/empirical_cdf/algorithm/sequential_kolmogorov_smirnov_distance.hpp (contents, props changed)
   sandbox/statistics/empirical_cdf/libs/
   sandbox/statistics/empirical_cdf/libs/statistics/
   sandbox/statistics/empirical_cdf/libs/statistics/empirical_cdf/
   sandbox/statistics/empirical_cdf/libs/statistics/empirical_cdf/doc/
   sandbox/statistics/empirical_cdf/libs/statistics/empirical_cdf/doc/readme.txt (contents, props changed)
   sandbox/statistics/empirical_cdf/libs/statistics/empirical_cdf/example/
   sandbox/statistics/empirical_cdf/libs/statistics/empirical_cdf/example/proportion_less_than.cpp (contents, props changed)
   sandbox/statistics/empirical_cdf/libs/statistics/empirical_cdf/example/proportion_less_than.h (contents, props changed)
   sandbox/statistics/empirical_cdf/libs/statistics/empirical_cdf/src/
   sandbox/statistics/empirical_cdf/libs/statistics/empirical_cdf/src/main.cpp (contents, props changed)

Added: sandbox/statistics/empirical_cdf/boost/statistics/empirical_cdf/algorithm/cdf_empirical_cdf_differences.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/empirical_cdf/boost/statistics/empirical_cdf/algorithm/cdf_empirical_cdf_differences.hpp 2009-08-31 19:23:23 EDT (Mon, 31 Aug 2009)
@@ -0,0 +1,128 @@
+//////////////////////////////////////////////////////////////////////////////
+// empirical_cdf::algorithm::cdf_empirical_cdf_differences.hpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef BOOST_STATISTICS_EMPIRICAL_CDF_ALGORITHM_CDF_EMPIRICAL_CDF_DIFFERENCES_HPP_ER_2009
+#define BOOST_STATISTICS_EMPIRICAL_CDF_ALGORITHM_CDF_EMPIRICAL_CDF_DIFFERENCES_HPP_ER_2009
+#include <vector>
+#include <string>
+#include <stdexcept>
+#include <algorithm>
+#include <boost/type_traits/function_traits.hpp>
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/lambda.hpp>
+#include <ext/algorithm>
+#include <boost/format.hpp>
+#include <boost/range.hpp>
+#include <boost/bind.hpp>
+#include <boost/function.hpp>
+#include <boost/numeric/conversion/converter.hpp>
+#include <boost/iterator/counting_iterator.hpp>
+#include <boost/iterator/transform_iterator.hpp>
+#include <boost/iterator/iterator_traits.hpp>
+#include <boost/scalar_dist/fun_wrap/cdf.hpp>
+#include <boost/scalar_dist/meta/bind_delegate.hpp>
+#include <boost/scalar_dist/algorithm/transform.hpp>
+
+namespace boost{
+namespace statistics{
+namespace empirical_cdf{
+
+ // Computes {G_i-F_n(x[i]):i=0,...,n-1},
+ // sorted sample.
+ //
+ // Requirements:
+ // *[b_cdf,e_cdf) = {F(x[i]): x[i]<=x[i+1], i=0,...,n-1 }
+ template<typename InIt,typename OutIt>
+ OutIt cdf_empirical_cdf_differences(
+ InIt b_cdf,
+ InIt e_cdf,
+ OutIt out
+ );
+
+ // Same as cdf_empirical_cdf_differences but computes the cdf on the fly
+ //
+ // Requirements:
+ // *[b_x,e_x) = {x[i]: x[i]<=x[i+1], i=0,...,n-1 }
+ template<typename D,typename InIt,typename OutIt>
+ OutIt cdf_empirical_cdf_differences(
+ const D& dist, //e.g. normal_distribution<double>
+ InIt b_x,
+ InIt e_x,
+ OutIt out
+ );
+
+ // Implementation //
+
+ template<typename InIt,typename OutIt>
+ OutIt
+ cdf_empirical_cdf_differences(
+ InIt b_cdf,
+ InIt e_cdf,
+ OutIt out
+ ){
+ typedef typename iterator_value<InIt>::type value_t;
+ typedef typename iterator_difference<InIt>::type diff_t;
+ diff_t diff = std::distance(b_cdf,e_cdf);
+ value_t n = numeric::converter<value_t,diff_t>::convert(diff);
+ typedef numeric::converter<value_t,unsigned> conv_;
+
+ return std::transform(
+ counting_iterator<unsigned>(0),
+ counting_iterator<unsigned>(diff),
+ b_cdf,
+ out,
+ // TODO lambda expression might be cleaner:
+ bind<value_t>(
+ &fabs,
+ bind<value_t>(
+ std::minus<value_t>(),
+ bind<value_t>(
+ std::divides<value_t>(),
+ bind<value_t>(
+ numeric::converter<value_t,unsigned>(),
+ _1
+ ),
+ n
+ ),
+ _2
+ )
+ )
+ );
+ }
+
+ template<typename D,typename InIt,typename OutIt>
+ OutIt
+ cdf_empirical_cdf_differences(
+ const D& dist,
+ InIt b_x,
+ InIt e_x,
+ OutIt out
+ ){
+ if(!is_sorted(b_x,e_x)){
+ static const char* msg
+ = "cdf_empirical_cdf_differences : [b_x,e_x) not sorted";
+ throw std::runtime_error(
+ msg
+ );
+ }
+ typedef math::bind_delegate<D> meta_;
+ typedef typename math::bind_delegate<D>::type deleg_;
+ deleg_ d = meta_::template make<math::fun_wrap::cdf_>(dist);
+
+ return cdf_empirical_cdf_differences(
+ make_transform_iterator(b_x,d),
+ make_transform_iterator(e_x,d),
+ out
+ );
+ }
+
+}// empirical_cdf
+}// statistics
+}// boost
+
+#endif

Added: sandbox/statistics/empirical_cdf/boost/statistics/empirical_cdf/algorithm/kolmogorov_smirnov_distance.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/empirical_cdf/boost/statistics/empirical_cdf/algorithm/kolmogorov_smirnov_distance.hpp 2009-08-31 19:23:23 EDT (Mon, 31 Aug 2009)
@@ -0,0 +1,55 @@
+//////////////////////////////////////////////////////////////////////////////
+// empirical_cdf::algorithm::kolmogorov_smirnov_distance.hpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef BOOST_STATISTICS_EMPIRICAL_CDF_ALGORITHM_KOLMOGOROV_SMIRNOV_DISTANCE_HPP_ER_2009
+#define BOOST_STATISTICS_EMPIRICAL_CDF_ALGORITHM_KOLMOGOROV_SMIRNOV_DISTANCE_HPP_ER_2009
+#include <vector>
+#include <boost/range.hpp>
+#include <boost/iterator/iterator_traits.hpp>
+#include <boost/statistics/empirical_cdf/algorithm/sequential_kolmogorov_smirnov_distance.hpp>
+
+namespace boost{
+namespace statistics{
+namespace empirical_cdf{
+
+ // *[b_x,e_x) represents a random sample (not necessarily sorted)
+ template<typename D,typename InIt>
+ typename iterator_value<InIt>::type
+ kolmogorov_smirnov_distance(
+ const D& dist, // e.g. D == normal_distribution<double>
+ InIt b_x,
+ InIt e_x
+ );
+
+ // Implementation //
+
+ template<typename D,typename InIt>
+ typename iterator_value<InIt>::type
+ kolmogorov_smirnov_distance(
+ const D& dist,
+ InIt b_x,
+ InIt e_x
+ ){
+ typedef typename iterator_value<InIt>::type val_;
+ typedef std::vector<val_> vals_;
+ vals_ vals(1);
+ sequential_kolmogorov_smirnov_distance(
+ dist,
+ b_x,
+ e_x,
+ 1,
+ boost::begin(vals)
+ );
+ return vals.back();
+ }
+
+}// empirical_cdf
+}// statistics
+}// boost
+
+#endif

Added: sandbox/statistics/empirical_cdf/boost/statistics/empirical_cdf/algorithm/proportion_less_than.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/empirical_cdf/boost/statistics/empirical_cdf/algorithm/proportion_less_than.hpp 2009-08-31 19:23:23 EDT (Mon, 31 Aug 2009)
@@ -0,0 +1,42 @@
+///////////////////////////////////////////////////////////////////////////////
+// empirical_cdf::algorithm::proportion_less_than.hpp //
+// //
+// Copyright 2009 Erwann Rogard. Distributed under the Boost //
+// Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef BOOST_STATISTICS_EMPIRICAL_CDF_ALGORITHM_PROPORTION_LESS_THAN_HPP_ER_2009
+#define BOOST_STATISTICS_EMPIRICAL_CDF_ALGORITHM_PROPORTION_LESS_THAN_HPP_ER_2009
+#include <numeric>
+#include <functional>
+#include <boost/lambda/lambda.hpp>
+#include <boost/iterator/iterator_traits.hpp>
+
+namespace boost{
+namespace statistics{
+namespace empirical_cdf{
+
+ // Returns the proportions of elements in *[b,e) whose value is less than x
+ //
+ // Was motivated by the Cook-Gelman validation method
+ template<typename InIt>
+ typename iterator_value<InIt>::type
+ proportion_less_than(
+ InIt b,
+ InIt e,
+ typename iterator_value<InIt>::type x
+ ){
+ typedef typename iterator_value<InIt>::type value_type;
+ value_type m = static_cast<value_type>(
+ std::count_if(b, e, (lambda::_1<x))
+ );
+ value_type n = static_cast<value_type>(
+ std::distance(b,e)
+ );
+ return m / n;
+ }
+
+}// empirical_cdf
+}// statistics
+}// boost
+#endif

Added: sandbox/statistics/empirical_cdf/boost/statistics/empirical_cdf/algorithm/sequential_kolmogorov_smirnov_distance.hpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/empirical_cdf/boost/statistics/empirical_cdf/algorithm/sequential_kolmogorov_smirnov_distance.hpp 2009-08-31 19:23:23 EDT (Mon, 31 Aug 2009)
@@ -0,0 +1,96 @@
+//////////////////////////////////////////////////////////////////////////////
+// empirical_cdf::algorithm::sequential_kolmogorov_smirnov_distance.hpp //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+#ifndef BOOST_STATISTICS_EMPIRICAL_CDF_ALGORITHM_SEQUENTIAL_KOLMOGOROV_SMIRNOV_DISTANCE_HPP_ER_2009
+#define BOOST_STATISTICS_EMPIRICAL_CDF_ALGORITHM_SEQUENTIAL_KOLMOGOROV_SMIRNOV_DISTANCE_HPP_ER_2009
+#include <vector>
+#include <string>
+#include <stdexcept>
+#include <algorithm>
+#include <boost/format.hpp>
+#include <boost/range.hpp>
+#include <boost/iterator/iterator_traits.hpp>
+
+#include <boost/statistics/empirical_cdf/algorithm/cdf_empirical_cdf_differences.hpp>
+
+namespace boost{
+namespace statistics{
+namespace empirical_cdf{
+
+ // Computes the ks-distance for each of
+ // i = 0 i=1 i=k-1
+ // [x[0],x[m]), [x[0],x[2m]), ..., [x[0],x[n]), m = n/k
+ //
+ // Requirements:
+ // [b_x,e_x) is the empirical sample as originally drawn i.e. NOT SORTED
+ template<typename D,typename It,typename ItO>
+ ItO sequential_kolmogorov_smirnov_distance(
+ const D& dist,
+ It b_x,
+ It e_x,
+ typename iterator_difference<It>::type k,
+ ItO i_o
+ );
+
+ // Implementation //
+
+ template<typename D,typename It,typename ItO>
+ ItO sequential_kolmogorov_smirnov_distance(
+ const D& dist,
+ It b_x,
+ It e_x,
+ typename iterator_difference<It>::type k,
+ ItO i_o
+ ){
+ typedef typename iterator_difference<It>::type diff_;
+ typedef typename iterator_value<It>::type val_;
+ typedef std::vector<val_> vals_;
+ typedef typename range_iterator<vals_>::type it_val_;
+ if( b_x == e_x ){ return i_o; }
+ diff_ diff = std::distance( b_x, e_x);
+ if(diff % k != 0){
+ static const char* msg = strcpy(
+ "sequential_kolmogorov_smirnov_distance",
+ "diff = %1% not multiple of k = %2%."
+ );
+ format f(msg); f % diff % k;
+ throw std::runtime_error(f.str());
+ }
+ vals_ cdfs(diff);
+ vals_ r_x; r_x.reserve(diff);
+ diff_ delta = diff / k;
+
+ It i_x = b_x;
+ while(i_x<e_x){
+ std::advance(i_x,delta);
+ r_x.clear();
+ std::copy(
+ b_x,i_x,std::back_inserter(r_x)
+ );
+ std::sort(boost::begin(r_x),boost::end(r_x));
+ it_val_ e_cdf = cdf_empirical_cdf_differences(
+ dist,boost::begin(r_x),boost::end(r_x),boost::begin(cdfs)
+ );
+ *i_o = (
+ *(
+ std::max_element(
+ boost::begin(cdfs),
+ e_cdf
+ )
+ )
+ );
+ ++i_o;
+ }
+ return i_o;
+ }
+
+}// empirical_cdf
+}// statistics
+}// boost
+
+#endif

Added: sandbox/statistics/empirical_cdf/libs/statistics/empirical_cdf/doc/readme.txt
==============================================================================
--- (empty file)
+++ sandbox/statistics/empirical_cdf/libs/statistics/empirical_cdf/doc/readme.txt 2009-08-31 19:23:23 EDT (Mon, 31 Aug 2009)
@@ -0,0 +1,35 @@
+//////////////////////////////////////////////////////////////////////////////
+// empirical_cdf::doc::readme //
+// //
+// (C) Copyright 2009 Erwann Rogard //
+// Use, modification and distribution are subject to the //
+// Boost Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+//////////////////////////////////////////////////////////////////////////////
+
+[ Contact ]
+
+erwann.rogard_at_[hidden]
+
+[ Overview ]
+
+These are C++ algorithms that compute non-parametric statistics.For example,
+ kolmogorov_smirnov_distance(
+ dist,
+ b_x,
+ e_x
+ );
+
+[ Compiler ]
+
+gcc version i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1
+
+[ Dependencies ]
+
+boost_1_39_0
+
+[ History ]
+
+August 31st 2009 : renamed non_param to statistics::empirical_cdf
+July 2009 : Creation
+

Added: sandbox/statistics/empirical_cdf/libs/statistics/empirical_cdf/example/proportion_less_than.cpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/empirical_cdf/libs/statistics/empirical_cdf/example/proportion_less_than.cpp 2009-08-31 19:23:23 EDT (Mon, 31 Aug 2009)
@@ -0,0 +1,40 @@
+///////////////////////////////////////////////////////////////////////////////
+// empirical_cdf::example::algorithm::proportion_less_than.h //
+// //
+// Copyright 2009 Erwann Rogard. Distributed under the Boost //
+// Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+///////////////////////////////////////////////////////////////////////////////
+#include <vector>
+#include <boost/assign/std/vector.hpp>
+#include <boost/statistics/empirical_cdf/algorithm/proportion_less_than.hpp>
+#include <libs/statistics/empirical_cdf/example/proportion_less_than.h>
+
+void example_algorithm_proportion_less_than(std::ostream& out){
+ out << "-> example_algorithm_proportion_less_than : ";
+ using namespace boost;
+ using namespace statistics;
+
+ // Types
+ typedef unsigned val_;
+ typedef std::vector<val_> vec_;
+
+ // Constants
+ const val_ j = 4;
+
+ // Initialization
+ vec_ vec;
+ {
+ using namespace assign;
+ vec += 0,1,2,3,4,5,6,7,8,9;
+ }
+
+ // Computations
+ empirical_cdf::proportion_less_than(
+ boost::begin(vec),
+ boost::end(vec),
+ j
+ );
+
+ out << "<-" << std::endl;
+}
\ No newline at end of file

Added: sandbox/statistics/empirical_cdf/libs/statistics/empirical_cdf/example/proportion_less_than.h
==============================================================================
--- (empty file)
+++ sandbox/statistics/empirical_cdf/libs/statistics/empirical_cdf/example/proportion_less_than.h 2009-08-31 19:23:23 EDT (Mon, 31 Aug 2009)
@@ -0,0 +1,14 @@
+///////////////////////////////////////////////////////////////////////////////
+// example::algorithm::proportion_less_than.cpp //
+// //
+// Copyright 2009 Erwann Rogard. Distributed under the Boost //
+// Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef LIBS_STATISTICS_EMPIRICAL_CDF_EXAMPLE_ALGORITHM_PROPORTION_LESS_THAN_H_ER_2009
+#define LIBS_STATISTICS_EMPIRICAL_CDF_EXAMPLE_ALGORITHM_PROPORTION_LESS_THAN_H_ER_2009
+#include <ostream>
+
+void example_algorithm_proportion_less_than(std::ostream& out);
+
+#endif
\ No newline at end of file

Added: sandbox/statistics/empirical_cdf/libs/statistics/empirical_cdf/src/main.cpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/empirical_cdf/libs/statistics/empirical_cdf/src/main.cpp 2009-08-31 19:23:23 EDT (Mon, 31 Aug 2009)
@@ -0,0 +1,18 @@
+///////////////////////////////////////////////////////////////////////////////
+// empirical_cdf::example::main.cpp //
+// //
+// Copyright 2009 Erwann Rogard. Distributed under the Boost //
+// Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+///////////////////////////////////////////////////////////////////////////////
+#include <iostream>
+#include <libs/statistics/empirical_cdf/example/proportion_less_than.h>
+
+int main(){
+
+ example_algorithm_proportion_less_than(std::cout);
+
+ // See sandbox/dist_random for other examples.
+
+ return 0;
+}
\ No newline at end of file


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk