Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r62352 - in sandbox/statistics/non_parametric: boost/statistics/detail/non_parametric/contingency_table/cells boost/statistics/detail/non_parametric/kolmogorov_smirnov libs/statistics/detail/non_parametric/doc
From: erwann.rogard_at_[hidden]
Date: 2010-05-31 14:57:00


Author: e_r
Date: 2010-05-31 14:56:59 EDT (Mon, 31 May 2010)
New Revision: 62352
URL: http://svn.boost.org/trac/boost/changeset/62352

Log:
m
Text files modified:
   sandbox/statistics/non_parametric/boost/statistics/detail/non_parametric/contingency_table/cells/cells.hpp | 21 +++++++--------
   sandbox/statistics/non_parametric/boost/statistics/detail/non_parametric/contingency_table/cells/count_matching.hpp | 1
   sandbox/statistics/non_parametric/boost/statistics/detail/non_parametric/kolmogorov_smirnov/statistic.hpp | 44 --------------------------------
   sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/doc/readme.txt | 54 ++++++++++++++++++++++++++++++++++++++++
   4 files changed, 64 insertions(+), 56 deletions(-)

Modified: sandbox/statistics/non_parametric/boost/statistics/detail/non_parametric/contingency_table/cells/cells.hpp
==============================================================================
--- sandbox/statistics/non_parametric/boost/statistics/detail/non_parametric/contingency_table/cells/cells.hpp (original)
+++ sandbox/statistics/non_parametric/boost/statistics/detail/non_parametric/contingency_table/cells/cells.hpp 2010-05-31 14:56:59 EDT (Mon, 31 May 2010)
@@ -94,14 +94,12 @@
         void operator()(const Args& args){
             namespace ns = boost::statistics::detail;
             typedef boost::math::policies::policy<> pol_;
- for(long int i = 0; i< args[boost::accumulators::weight]; i++)
- {
- this->update_if(
- hashable_subsample_( args[boost::accumulators::sample] ),
- args[ boost::accumulators::accumulator ],
- args[ ns::_policy | pol_() ]
- );
- }
+ this->update_if(
+ hashable_subsample_( args[boost::accumulators::sample] ),
+ args[ boost::accumulators::accumulator ],
+ args[ ns::_policy | pol_() ],
+ args[boost::accumulators::weight]
+ );
         }
                 
         result_type result(dont_care_)const{
@@ -109,11 +107,12 @@
         }
 
                 private:
- template<typename V,typename A,typename P>
+ template<typename V,typename A,typename P,typename N>
         void update_if(
             const V& s,
             const A& acc,
- const P& policy
+ const P& policy,
+ const N& size
         ){
             this->error_logger.reset();
             boost::fusion::for_each(
@@ -122,7 +121,7 @@
             );
             if(!this->error_logger.is_error())
             {
- ++( this->map )[ s ];
+ ( this->map )[ s ] += size;
             }else{
                 namespace ns = contingency_table;
                 static const char* fun = "impl::cells::update_if %1%";

Modified: sandbox/statistics/non_parametric/boost/statistics/detail/non_parametric/contingency_table/cells/count_matching.hpp
==============================================================================
--- sandbox/statistics/non_parametric/boost/statistics/detail/non_parametric/contingency_table/cells/count_matching.hpp (original)
+++ sandbox/statistics/non_parametric/boost/statistics/detail/non_parametric/contingency_table/cells/count_matching.hpp 2010-05-31 14:56:59 EDT (Mon, 31 May 2010)
@@ -10,7 +10,6 @@
 #include <boost/mpl/assert.hpp>
 #include <boost/type_traits/is_same.hpp>
 #include <boost/statistics/detail/non_parametric/contingency_table/cells/cells.hpp>
-#include <boost/statistics/detail/non_parametric/contingency_table/sample/view.hpp>
 #include <boost/mpl/detail/wrapper.hpp>
 
 namespace boost{

Modified: sandbox/statistics/non_parametric/boost/statistics/detail/non_parametric/kolmogorov_smirnov/statistic.hpp
==============================================================================
--- sandbox/statistics/non_parametric/boost/statistics/detail/non_parametric/kolmogorov_smirnov/statistic.hpp (original)
+++ sandbox/statistics/non_parametric/boost/statistics/detail/non_parametric/kolmogorov_smirnov/statistic.hpp 2010-05-31 14:56:59 EDT (Mon, 31 May 2010)
@@ -54,50 +54,6 @@
         template<typename Args>
         result_type result(dont_care_) const
         {
-/*
- typedef T1 val_;
- typedef boost::accumulators::tag::accumulator tag_acc_;
- typedef boost::accumulators::tag::count tag_n_;
- typedef boost::statistics::detail::empirical_distribution
- ::tag::ordered_sample tag_os_;
- typedef boost::statistics::detail::kolmogorov_smirnov
- ::tag::reference_distribution tag_dist_;
-
- typedef boost::parameter::binding<Args,tag_acc_> bind1_;
- typedef typename bind1_::type cref_acc_;
- typedef boost::parameter::binding<Args,tag_dist_> bind2_;
- typedef typename bind2_::type cref_dist_;
- typedef typename boost::accumulators::detail
- ::extractor_result<Args,tag_os_>::type ref_os_;
- typedef typename boost::remove_const< //in case ref changed to cref
- typename boost::remove_reference<
- ref_os_
- >::type
- >::type os_;
- typedef typename boost::range_reference<os_>::type ref_elem_;
-
- cref_acc_ acc = args[boost::accumulators::accumulator];
- cref_dist_ dist = args[
- boost::statistics::detail
- ::kolmogorov_smirnov::keyword::reference_distribution
- ];
- ref_os_ ref_os = boost::accumulators::extract_result<tag_os_>(acc);
-
- val_ m1 = static_cast<val_>(0);
- size_type i = 0;
- size_type n = boost::accumulators::extract_result<tag_n_>(acc);
-
- BOOST_FOREACH(ref_elem_ e,ref_os){
- i += e.second;
- val_ ecdf = static_cast<val_>(i) / static_cast<val_>(n);
- val_ true_cdf = cdf(dist,e.first);
- val_ m2
- = (true_cdf > ecdf)?(true_cdf - ecdf) : (ecdf - true_cdf);
- if(m2 > m1){ m1 = m2; }
- }
-
- return m1;
-*/
         }
         };
     

Modified: sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/doc/readme.txt
==============================================================================
--- sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/doc/readme.txt (original)
+++ sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/doc/readme.txt 2010-05-31 14:56:59 EDT (Mon, 31 May 2010)
@@ -21,3 +21,57 @@
 
     /boost_1_41_0
     /sandbox/statistics/support
+
+[ Contingency Table ]
+
+ Background:
+
+ http://en.wikipedia.org/wiki/Contingency_table
+ http://en.wikipedia.org/wiki/Pearson%27s_chi-square_test
+
+ A 2-way contingency table records the counts, in a sample, of each possible
+ outcome or "class" of a variable, Z = (X,Y), where each of X and Y are
+ categorical variables.
+
+ As given in this oder, it is customary to call X and Y the row and column
+ variables, respectively.
+
+ A factor is the vector of categories, known as "levels", representing the
+ domain of a marginal variable, in our case, X and Y.
+
+ In a simple clinical trial, for example, we record X = 0 if a patient
+ receives the placebo, X = 1 if it receives the treatment, Y = 0 and Y = 1,
+ if his/her condition deteriorated or improved, respectively.
+
+ Let j = 1,...,J index the classes of Z, each denoted z_j with probability
+ p_j, and recorded count and frequency, respectively, n_j and q_j. Let
+ n = {sum n_j : j=1,...,J} denote the sample size. Each (z_j,n_j) pair is
+ called a cell.
+
+ The above extends in an intuitive way to n-way tables.
+
+ A hypothesis about the distribution of Z is usually formulated as follows:
+
+ H0 = {p_j = k_j : j = 1,...,J}
+
+ Evidence against H0 is aggregated into a chi-square statistic:
+
+ X^2 = sum_j (q_j - k_j)^2 / k_j
+
+ By statistical theory, if the sample is drawn under certain conditions of
+ randomness (iid), as n tends to infinity,
+
+ X^2 ~ Chi-squared(df)
+
+ where df is given by a formula that takes into account J, and H0.
+ Given alpha > 0, if X^2 > x such that Pr(Chi-squared(df)>x) = alpha, H0 is
+ rejected.
+
+ For example,
+ H0 = {independence between rows and columns}
+ is equivalent to
+ k_j = Pr(X=x)P(Y=y), for each j=(x,y). If P(X=x) and P(Y=y) are known, we
+ apply the above procedure directly. Otherwise, P(X=x) and P(Y=y) are
+ estimated from the table.
+
+


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk