Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r62316 - sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/test
From: erwann.rogard_at_[hidden]
Date: 2010-05-30 12:05:21


Author: e_r
Date: 2010-05-30 12:05:19 EDT (Sun, 30 May 2010)
New Revision: 62316
URL: http://svn.boost.org/trac/boost/changeset/62316

Log:
m
Added:
   sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/test/
   sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/test/contingency_table1.cpp (contents, props changed)
   sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/test/contingency_table1.h (contents, props changed)
   sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/test/contingency_table2.cpp (contents, props changed)
   sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/test/contingency_table2.h (contents, props changed)

Added: sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/test/contingency_table1.cpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/test/contingency_table1.cpp 2010-05-30 12:05:19 EDT (Sun, 30 May 2010)
@@ -0,0 +1,96 @@
+///////////////////////////////////////////////////////////////////////////////
+// contingency_table1.cpp //
+// //
+// Copyright 2010 Erwann Rogard. Distributed under the Boost //
+// Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+///////////////////////////////////////////////////////////////////////////////
+#include <string>
+#include <boost/mpl/int.hpp>
+#include <boost/fusion/include/make_map.hpp>
+#include <boost/fusion/container/map/detail/sequence_to_map.hpp>
+#include <boost/mpl/vector/vector10.hpp>
+#include <boost/accumulators/framework/accumulator_set.hpp>
+#include <boost/accumulators/statistics/stats.hpp>
+#include <boost/assign/list_of.hpp>
+#include <boost/statistics/detail/non_parametric/contingency_table/include/pearson_chisq/independence.hpp>
+#include <boost/statistics/detail/non_parametric/contingency_table/include/factor.hpp>
+#include <boost/statistics/detail/non_parametric/contingency_table/include/cells.hpp>
+
+#include <libs/statistics/detail/non_parametric/test/contingency_table1.h>
+
+void test_contingency_table1()
+{
+
+ namespace ct = boost::statistics::detail::contingency_table;
+ namespace pearson_chisq = ct::pearson_chisq;
+
+ typedef double val_;
+ typedef boost::mpl::int_<0> x_; typedef int data_x_;
+ typedef boost::mpl::int_<1> y_; typedef std::string data_y_;
+ typedef boost::mpl::int_<2> z_; typedef int data_z_;
+
+ typedef boost::fusion::detail::sequence_to_map<
+ boost::mpl::vector6<x_,data_x_,y_,data_y_,z_,data_z_>
+ >::type sample_;
+
+ typedef boost::mpl::vector2<x_,y_> keys_;
+ typedef ct::pearson_chisq::tag::independence<keys_> hypothesis_;
+ typedef boost::accumulators::stats< hypothesis_> stats_;
+ typedef boost::accumulators::accumulator_set< sample_, stats_, long int > acc_;
+
+ using namespace boost::assign;
+ acc_ acc(( ct::factor::_map_of_levels = boost::fusion::make_map<x_,y_>(
+ list_of(-1)(1), list_of("a")("b")("c") ) ));
+
+ {
+ // Check levels
+ BOOST_ASSERT( ct::cells::cells_count<keys_>( acc ) == 2 * 3 );
+ BOOST_ASSERT( ct::factor::extract::levels<x_>( acc ).count( -1 ) == 1 );
+ BOOST_ASSERT( ct::factor::extract::levels<x_>( acc ).count( 1 ) == 1 );
+ BOOST_ASSERT( ct::factor::extract::levels<x_>( acc ).count( 2 ) == 0 );
+ BOOST_ASSERT( ct::factor::extract::levels<y_>( acc ).count( "a" ) == 1 );
+ BOOST_ASSERT( ct::factor::extract::levels<y_>( acc ).count( "b" ) == 1 );
+ BOOST_ASSERT( ct::factor::extract::levels<y_>( acc ).count( "c" ) == 1 );
+ BOOST_ASSERT( ct::factor::extract::levels<y_>( acc ).count( "d" ) == 0 );
+ }
+ { //.....|.....|.....|.....|.....|.....|.....|
+ // |non- | x = | y = |
+ //total|empty|...........|.................|
+ // Filling the cells //count|cells| -1 | 1 | "a" | "b" | "c" |
+ using namespace boost::accumulators; //.....|count|.....|.....|.....|.....|.....|
+ acc( boost::fusion::make_map<x_,y_,z_>( -1, "a", 1 ), weight = 1 ); // 1 | 1 | 1 | 0 | 1 | 0 | 0 |
+ acc( boost::fusion::make_map<x_,y_,z_>( 1, "b", 1 ), weight = 2 ); // 3 | 2 | 1 | 2 | 1 | 2 | 0 |
+ }
+ { // Check degrees of freedom
+ // lost df = (r-1) + (c-1) = 1 + 2 = 3;
+ // df = rc -r -c + 1 = 3 * 2 - 3 -2 + 1 = 2
+ BOOST_ASSERT( pearson_chisq::lost_df( acc, hypothesis_() ) == 3);
+ BOOST_ASSERT( pearson_chisq::df( acc, hypothesis_() ) == 2);
+ }
+ {
+ // Check cells
+ typedef boost::mpl::vector1<x_> vec_x_;
+ typedef boost::mpl::vector1<y_> vec_y_;
+ std::size_t n;
+ n = boost::accumulators::extract::weighted_count( acc );
+ BOOST_ASSERT( n == 3 );
+ n = ct::cells::non_empty_cells_count<keys_>( acc );
+ BOOST_ASSERT( n == 2 );
+ n = ct::cells::extract::cells<vec_x_>( acc )[ boost::fusion::make_map<x_>( -1 ) ];
+ BOOST_ASSERT( n == 1 );
+ n = ct::cells::extract::cells<vec_x_>( acc )[ boost::fusion::make_map<x_>( 1 ) ];
+ BOOST_ASSERT( n == 2 );
+ n = ct::cells::extract::cells<vec_y_>( acc )[ boost::fusion::make_map<y_>( "a" ) ];
+ BOOST_ASSERT( n == 1 );
+ n = ct::cells::extract::cells<vec_y_>( acc )[ boost::fusion::make_map<y_>( "b" ) ];
+ BOOST_ASSERT( n == 2 );
+ n = ct::cells::extract::cells<vec_y_>( acc )[ boost::fusion::make_map<y_>( "c" ) ];
+ BOOST_ASSERT( n == 0 );
+ n = ct::cells::extract::cells<keys_>( acc )[ boost::fusion::make_map<x_,y_,z_>( -1, "a", 1 ) ];
+ BOOST_ASSERT( n == 1 );
+ n = ct::cells::extract::cells<keys_>( acc )[ boost::fusion::make_map<x_,y_,z_>( 1, "b", 1 ) ];
+ BOOST_ASSERT( n == 2 );
+ }
+
+}

Added: sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/test/contingency_table1.h
==============================================================================
--- (empty file)
+++ sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/test/contingency_table1.h 2010-05-30 12:05:19 EDT (Sun, 30 May 2010)
@@ -0,0 +1,13 @@
+///////////////////////////////////////////////////////////////////////////////
+// contingency_table1.h //
+// //
+// Copyright 2010 Erwann Rogard. Distributed under the Boost //
+// Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef LIBS_STATISTICS_DETAIL_NON_PARAMETRIC_TEST_CONTINGENCY_TABLE1_HPP_ER_2010
+#define LIBS_STATISTICS_DETAIL_NON_PARAMETRIC_TEST_CONTINGENCY_TABLE1_HPP_ER_2010
+
+void test_contingency_table1();
+
+#endif

Added: sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/test/contingency_table2.cpp
==============================================================================
--- (empty file)
+++ sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/test/contingency_table2.cpp 2010-05-30 12:05:19 EDT (Sun, 30 May 2010)
@@ -0,0 +1,217 @@
+///////////////////////////////////////////////////////////////////////////////
+// contingency_table2.cpp //
+// //
+// Copyright 2010 Erwann Rogard. Distributed under the Boost //
+// Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+///////////////////////////////////////////////////////////////////////////////
+#include <cmath>
+#include <string>
+
+#include <boost/mpl/int.hpp>
+#include <boost/mpl/vector/vector10.hpp>
+
+#include <boost/assign/list_of.hpp>
+
+#include <boost/fusion/container/map/detail/sequence_to_map.hpp>
+#include <boost/fusion/include/make_map.hpp>
+
+#include <boost/accumulators/statistics/stats.hpp>
+#include <boost/accumulators/framework/parameters/weight.hpp>
+#include <boost/accumulators/framework/accumulator_set.hpp>
+#include <boost/accumulators/statistics/detail/weighted_count.hpp>
+
+#include <boost/statistics/detail/non_parametric/contingency_table/include/factor.hpp>
+#include <boost/statistics/detail/non_parametric/contingency_table/include/pearson_chisq/independence.hpp>
+
+#include <libs/statistics/detail/non_parametric/test/contingency_table2.h>
+
+/*
+# Read the count data for this problem
+count<-scan()
+ 12 34 23 4 47 11
+ 35 31 11 34 10 18
+ 12 32 9 18 13 19
+ 12 12 14 9 33 25
+
+# Create factor tags:r=rows, c=columns, t=tiers
+r <- factor(gl(4, 2*3, 2*3*4, labels=c("r1","r2", "r3", "r4"))); r
+c <- factor(gl(3, 1, 2*3*4, labels=c("c1","c2", "c3"))); c
+t <- factor(gl(2, 3, 2*3*4, labels=c("t1","t2"))); t
+
+# Cross-tabulation of counts:
+xtabs(count~r+c+t) # all three factors
+xtabs(count~r+c) # RC: sum over tiers
+xtabs(count~r+t) # RT: sum over columns
+xtabs(count~c+t) # CT: sum over rows
+xtabs(count~r) # R: sum over columns and tiers
+xtabs(count~c) # C: sum over rows and tiers
+xtabs(count~t) # T: sum over rows and columns
+
+# 3-way Chi squared test of independence
+summary(xtabs(count~r+c+t))
+# 2-way Chi squared test of partial independence: R versus CT
+summary(xtabs(count~c+t))
+# 2-way Chi squared test of partial independence: C versus RT
+summary(xtabs(count~r+t))
+# 2-way Chi squared test of partial independence: T versus RC
+summary(xtabs(count~r+c))
+*/
+
+void test_contingency_table2()
+{
+
+ namespace ct = boost::statistics::detail::contingency_table;
+ namespace pearson_chisq = ct::pearson_chisq;
+ using namespace boost::accumulators;
+
+ typedef double val_;
+ typedef boost::mpl::int_<0> r_; typedef std::string data_r_;
+ typedef boost::mpl::int_<1> c_; typedef std::string data_c_;
+ typedef boost::mpl::int_<2> t_; typedef std::string data_t_;
+
+ typedef boost::fusion::detail::sequence_to_map<
+ boost::mpl::vector6<r_,data_r_,c_,data_c_,t_,data_t_>
+ >::type sample_;
+
+ typedef boost::mpl::vector3<r_,c_,t_> all_three_factors_;
+ typedef boost::mpl::vector2<r_,c_> sum_over_tiers_;
+ typedef boost::mpl::vector2<r_,t_> sum_over_cols_;
+ typedef boost::mpl::vector2<c_,t_> sum_over_rows_;
+
+ typedef ct::pearson_chisq::tag::independence<all_three_factors_> indep_r_c_t_;
+ typedef ct::pearson_chisq::tag::independence<sum_over_tiers_> indep_r_c_;
+ typedef ct::pearson_chisq::tag::independence<sum_over_cols_> indep_r_t_;
+ typedef ct::pearson_chisq::tag::independence<sum_over_rows_> indep_c_t_;
+
+ typedef boost::accumulators::stats<
+ indep_r_c_t_,
+ indep_r_c_,
+ indep_r_t_,
+ indep_c_t_
+ > stats_;
+ typedef boost::accumulators::accumulator_set< sample_, stats_, long int > acc_;
+
+ using namespace boost::assign;
+ acc_ acc(( ct::factor::_map_of_levels = boost::fusion::make_map<r_,c_,t_>(
+ list_of("r1")("r2")("r3")("r4"), list_of("c1")("c2")("c3"), list_of("t1")("t2") ) ));
+
+ typedef boost::fusion::result_of::make_map<
+ r_,c_,t_,data_r_,data_c_,data_t_>::type result_of_make_map_;
+ typedef result_of_make_map_(*fp_)(const data_r_&,const data_c_&,const data_t_&);
+
+ fp_ make_sample = boost::fusion::make_map<r_,c_,t_>;
+
+ acc( make_sample( "r1", "c1", "t1" ), weight = 12 );
+ acc( make_sample( "r1", "c1", "t2" ), weight = 4 );
+ acc( make_sample( "r1", "c2", "t1" ), weight = 34 );
+ acc( make_sample( "r1", "c2", "t2" ), weight = 47 );
+ acc( make_sample( "r1", "c3", "t1" ), weight = 23 );
+ acc( make_sample( "r1", "c3", "t2" ), weight = 11 );
+ acc( make_sample( "r2", "c1", "t1" ), weight = 35 );
+ acc( make_sample( "r2", "c1", "t2" ), weight = 34 );
+ acc( make_sample( "r2", "c2", "t1" ), weight = 31 );
+ acc( make_sample( "r2", "c2", "t2" ), weight = 10 );
+ acc( make_sample( "r2", "c3", "t1" ), weight = 11 );
+ acc( make_sample( "r2", "c3", "t2" ), weight = 18 );
+ acc( make_sample( "r3", "c1", "t1" ), weight = 12 );
+ acc( make_sample( "r3", "c1", "t2" ), weight = 18 );
+ acc( make_sample( "r3", "c2", "t1" ), weight = 32 );
+ acc( make_sample( "r3", "c2", "t2" ), weight = 13 );
+ acc( make_sample( "r3", "c3", "t1" ), weight = 9 );
+ acc( make_sample( "r3", "c3", "t2" ), weight = 19 );
+ acc( make_sample( "r4", "c1", "t1" ), weight = 12 );
+ acc( make_sample( "r4", "c1", "t2" ), weight = 9 );
+ acc( make_sample( "r4", "c2", "t1" ), weight = 12 );
+ acc( make_sample( "r4", "c2", "t2" ), weight = 33 );
+ acc( make_sample( "r4", "c3", "t1" ), weight = 14 );
+ acc( make_sample( "r4", "c3", "t2" ), weight = 25 );
+
+ using namespace std;
+
+ val_ stat;
+ long df;
+
+/*
+#### Output and interpretation (at significance level alpha=0.05):
+ # 3-way Chi squared test of independence
+ > summary(xtabs(count~r+c+t))
+ Call: xtabs(formula = count ~ r + c + t)
+ Number of cases in table: 478
+ Number of factors: 3
+ Test for independence of all factors:
+ Chisq = 102.17, df = 17, p-value = 3.514e-14
+#
+# ==> reject H0 in favor of
+# HA: r,c,t are NOT mutually independent
+#
+*/
+
+ // #include <boost/test/unit_test.hpp>
+ // #include <boost/test/floating_point_comparison.hpp>
+
+ df = pearson_chisq::df(acc,indep_r_c_t_());
+ stat = pearson_chisq::statistic<val_>( acc, indep_r_c_t_() );
+ BOOST_ASSERT( df == 17 );
+ BOOST_ASSERT( fabs(stat - 102.17) < 0.01 );
+
+/*
+ > # 2-way Chi squared test of partial independence: R versus CT
+ > summary(xtabs(count~c+t))
+ Call: xtabs(formula = count ~ c + t)
+ Number of cases in table: 478
+ Number of factors: 2
+ Test for independence of all factors:
+ Chisq = 2.3704, df = 2, p-value = 0.3057
+#
+# ==> do not reject H0: c,t are mutually independent
+#
+*/
+
+ df = pearson_chisq::df(acc,indep_c_t_());
+ stat = pearson_chisq::statistic<val_>( acc, indep_c_t_() );
+ BOOST_ASSERT( df == 2 );
+ BOOST_ASSERT( fabs( stat == 2.3704 )< 0.001 );
+
+/*
+ >
+ > # 2-way Chi squared test of partial independence: C versus RT
+ > summary(xtabs(count~r+t))
+ Call: xtabs(formula = count ~ r + t)
+ Number of cases in table: 478
+ Number of factors: 2
+ Test for independence of all factors:
+ Chisq = 10.057, df = 3, p-value = 0.01809
+#
+# ==> reject H0 in favor of
+# HA: r,t are NOT mutually independent
+
+*/
+
+ df = pearson_chisq::df(acc,indep_r_t_());
+ stat = pearson_chisq::statistic<val_>( acc, indep_r_t_() );
+ BOOST_ASSERT( df == 3 );
+ BOOST_ASSERT( fabs( stat - 10.057 ) < 0.001 );
+
+/*
+ > # 2-way Chi squared test of partial independence: T versus RC
+ > summary(xtabs(count~r+c))
+ Call: xtabs(formula = count ~ r + c)
+ Number of cases in table: 478
+ Number of factors: 2
+ Test for independence of all factors:
+ Chisq = 58.67, df = 6, p-value = 8.363e-11
+
+#
+# ==> reject H0 in favor of
+# HA: r,c are NOT mutually independent
+#
+*/
+
+ df = pearson_chisq::df(acc,indep_r_c_());
+ stat = pearson_chisq::statistic<val_>( acc, indep_r_c_() );
+ BOOST_ASSERT( df == 6 );
+ BOOST_ASSERT( fabs( stat - 58.67 ) < 0.01 );
+
+}
+

Added: sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/test/contingency_table2.h
==============================================================================
--- (empty file)
+++ sandbox/statistics/non_parametric/libs/statistics/detail/non_parametric/test/contingency_table2.h 2010-05-30 12:05:19 EDT (Sun, 30 May 2010)
@@ -0,0 +1,13 @@
+///////////////////////////////////////////////////////////////////////////////
+// contingency_table2.h //
+// //
+// Copyright 2010 Erwann Rogard. Distributed under the Boost //
+// Software License, Version 1.0. (See accompanying file //
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef LIBS_STATISTICS_DETAIL_NON_PARAMETRIC_TEST_CONTINGENCY_TABLE2_HPP_ER_2010
+#define LIBS_STATISTICS_DETAIL_NON_PARAMETRIC_TEST_CONTINGENCY_TABLE2_HPP_ER_2010
+
+void test_contingency_table2();
+
+#endif


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk