Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r53462 - in trunk: boost/random libs/random
From: steven_at_[hidden]
Date: 2009-05-30 12:38:54


Author: steven_watanabe
Date: 2009-05-30 12:38:53 EDT (Sat, 30 May 2009)
New Revision: 53462
URL: http://svn.boost.org/trac/boost/changeset/53462

Log:
Fix the distribution of uniform_int.
Text files modified:
   trunk/boost/random/uniform_int.hpp | 134 ++++++++++++++++++++++++++++++++-------
   trunk/libs/random/random_test.cpp | 34 +++++++++
   2 files changed, 142 insertions(+), 26 deletions(-)

Modified: trunk/boost/random/uniform_int.hpp
==============================================================================
--- trunk/boost/random/uniform_int.hpp (original)
+++ trunk/boost/random/uniform_int.hpp 2009-05-30 12:38:53 EDT (Sat, 30 May 2009)
@@ -23,13 +23,9 @@
 #include <boost/limits.hpp>
 #include <boost/static_assert.hpp>
 #include <boost/detail/workaround.hpp>
-#include <boost/random/uniform_smallint.hpp>
 #include <boost/random/detail/config.hpp>
 #include <boost/random/detail/signed_unsigned_tools.hpp>
 #include <boost/type_traits/make_unsigned.hpp>
-#ifdef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS
-#include <boost/type_traits/is_float.hpp>
-#endif
 
 namespace boost {
 
@@ -119,45 +115,135 @@
       for(;;) {
         // concatenate several invocations of the base RNG
         // take extra care to avoid overflows
+
+ // limit == floor((range+1)/(brange+1))
+ // Therefore limit*(brange+1) <= range+1
         range_type limit;
         if(range == (std::numeric_limits<range_type>::max)()) {
           limit = range/(range_type(brange)+1);
- if(range % range_type(brange)+1 == range_type(brange))
+ if(range % (range_type(brange)+1) == range_type(brange))
             ++limit;
         } else {
           limit = (range+1)/(range_type(brange)+1);
         }
+
         // We consider "result" as expressed to base (brange+1):
         // For every power of (brange+1), we determine a random factor
         range_type result = range_type(0);
         range_type mult = range_type(1);
+
+ // loop invariants:
+ // result < mult
+ // mult <= range
         while(mult <= limit) {
+ // Postcondition: result <= range, thus no overflow
+ //
+ // limit*(brange+1)<=range+1 def. of limit (1)
+ // eng()-bmin<=brange eng() post. (2)
+ // and mult<=limit. loop condition (3)
+ // Therefore mult*(eng()-bmin+1)<=range+1 by (1),(2),(3) (4)
+ // Therefore mult*(eng()-bmin)+mult<=range+1 rearranging (4) (5)
+ // result<mult loop invariant (6)
+ // Therefore result+mult*(eng()-bmin)<range+1 by (5), (6) (7)
+ //
+ // Postcondition: result < mult*(brange+1)
+ //
+ // result<mult loop invariant (1)
+ // eng()-bmin<=brange eng() post. (2)
+ // Therefore result+mult*(eng()-bmin) <
+ // mult+mult*(eng()-bmin) by (1) (3)
+ // Therefore result+(eng()-bmin)*mult <
+ // mult+mult*brange by (2), (3) (4)
+ // Therefore result+(eng()-bmin)*mult <
+ // mult*(brange+1) by (4)
           result += random::detail::subtract<base_result>()(eng(), bmin) * mult;
+
+ // equivalent to (mult * (brange+1)) == range+1, but avoids overflow.
+ if(mult * range_type(brange) == range - mult + 1) {
+ // The destination range is an integer power of
+ // the generator's range.
+ return(result);
+ }
+
+ // Postcondition: mult <= range
+ //
+ // limit*(brange+1)<=range+1 def. of limit (1)
+ // mult<=limit loop condition (2)
+ // Therefore mult*(brange+1)<=range+1 by (1), (2) (3)
+ // mult*(brange+1)!=range+1 preceding if (4)
+ // Therefore mult*(brange+1)<range+1 by (3), (4) (5)
+ //
+ // Postcondition: result < mult
+ //
+ // See the second postcondition on the change to result.
           mult *= range_type(brange)+range_type(1);
         }
- if(mult == limit)
- // range+1 is an integer power of brange+1: no rejections required
- return result;
+ // loop postcondition: range/mult < brange+1
+ //
+ // mult > limit loop condition (1)
+ // Suppose range/mult >= brange+1 Assumption (2)
+ // range >= mult*(brange+1) by (2) (3)
+ // range+1 > mult*(brange+1) by (3) (4)
+ // range+1 > (limit+1)*(brange+1) by (1), (4) (5)
+ // (range+1)/(brange+1) > limit+1 by (5) (6)
+ // limit < floor((range+1)/(brange+1)) by (6) (7)
+ // limit==floor((range+1)/(brange+1)) def. of limit (8)
+ // not (2) reductio (9)
+ //
+ // loop postcondition: (range/mult)*mult+(mult-1) >= range
+ //
+ // (range/mult)*mult + range%mult == range identity (1)
+ // range%mult < mult def. of % (2)
+ // (range/mult)*mult+mult > range by (1), (2) (3)
+ // (range/mult)*mult+(mult-1) >= range by (3) (4)
+ //
+ // Note that the maximum value of result at this point is (mult-1),
+ // so after this final step, we generate numbers that can be
+ // at least as large as range. We have to really careful to avoid
+ // overflow in this final addition and in the rejection. Anything
+ // that overflows is larger than range and can thus be rejected.
+
         // range/mult < brange+1 -> no endless loop
- result += uniform_int<range_type>(0, range/mult)(eng) * mult;
- if(result <= range)
- return random::detail::add<range_type, result_type>()(result, min_value);
+ range_type result_increment = uniform_int<range_type>(0, range/mult)(eng);
+ if((std::numeric_limits<range_type>::max)() / mult < result_increment) {
+ // The multiplcation would overflow. Reject immediately.
+ continue;
+ }
+ result_increment *= mult;
+ // unsigned integers are guaranteed to wrap on overflow.
+ result += result_increment;
+ if(result < result_increment) {
+ // The addition overflowed. Reject.
+ continue;
+ }
+ if(result > range) {
+ // Too big. Reject.
+ continue;
+ }
+ return random::detail::add<range_type, result_type>()(result, min_value);
       }
     } else { // brange > range
- if(brange / range > 4 /* quantization_cutoff */ ) {
- // the new range is vastly smaller than the source range,
- // so quantization effects are not relevant
- return boost::uniform_smallint<result_type>(min_value, max_value)(eng);
- } else {
- // use rejection method to handle cases like 0..5 -> 0..4
- for(;;) {
- base_unsigned result =
- random::detail::subtract<base_result>()(eng(), bmin);
- // result and range are non-negative, and result is possibly larger
- // than range, so the cast is safe
- if(result <= static_cast<base_unsigned>(range))
- return random::detail::add<base_unsigned, result_type>()(result, min_value);
+ base_unsigned bucket_size;
+ // it's safe to add 1 to range, as long as we cast it first,
+ // because we know that it is less than brange. However,
+ // we do need to be careful not to cause overflow by adding 1
+ // to brange.
+ if(brange == (std::numeric_limits<base_unsigned>::max)()) {
+ bucket_size = brange / (static_cast<base_unsigned>(range)+1);
+ if(brange % (static_cast<base_unsigned>(range)+1) == static_cast<base_unsigned>(range)) {
+ ++bucket_size;
         }
+ } else {
+ bucket_size = (brange+1) / (static_cast<base_unsigned>(range)+1);
+ }
+ for(;;) {
+ base_unsigned result =
+ random::detail::subtract<base_result>()(eng(), bmin);
+ result /= bucket_size;
+ // result and range are non-negative, and result is possibly larger
+ // than range, so the cast is safe
+ if(result <= static_cast<base_unsigned>(range))
+ return random::detail::add<base_unsigned, result_type>()(result, min_value);
       }
     }
   }

Modified: trunk/libs/random/random_test.cpp
==============================================================================
--- trunk/libs/random/random_test.cpp (original)
+++ trunk/libs/random/random_test.cpp 2009-05-30 12:38:53 EDT (Sat, 30 May 2009)
@@ -64,7 +64,8 @@
   for(int k = 0; k < range; k++)
     sum += bucket[k];
   double avg = static_cast<double>(sum)/range;
- double threshold = 2*avg/std::sqrt(static_cast<double>(iter));
+ double p = 1 / static_cast<double>(range);
+ double threshold = 2*std::sqrt(static_cast<double>(iter)*p*(1-p));
   for(int i = 0; i < range; i++) {
     if(std::fabs(bucket[i] - avg) > threshold) {
       // 95% confidence interval
@@ -100,11 +101,37 @@
   // small range => larger range
   level_two uint05(uint12, int_gen(-3, 2));
   check_uniform_int(uint05, 100000);
+
+ // small range => larger range
+ level_two uint099(uint12, int_gen(0, 99));
+ check_uniform_int(uint099, 100000);
 
   // larger => small range, rejection case
   typedef boost::variate_generator<level_two&, int_gen> level_three;
   level_three uint1_4(uint05, int_gen(1, 4));
   check_uniform_int(uint1_4, 100000);
+
+ typedef boost::uniform_int<boost::uint8_t> int8_gen;
+ typedef boost::variate_generator<Generator&, int8_gen> gen8_t;
+
+ gen8_t gen8_03(gen, int8_gen(0, 3));
+
+ // use the full range of the type, where the destination
+ // range is a power of the source range
+ typedef boost::variate_generator<gen8_t, int8_gen> uniform_uint8;
+ uniform_uint8 uint8_0255(gen8_03, int8_gen(0, 255));
+ check_uniform_int(uint8_0255, 100000);
+
+ // use the full range, but a generator whose range is not
+ // a root of the destination range.
+ gen8_t gen8_02(gen, int8_gen(0, 2));
+ uniform_uint8 uint8_0255_2(gen8_02, int8_gen(0, 255));
+ check_uniform_int(uint8_0255_2, 100000);
+
+ // expand the range to a larger type.
+ typedef boost::variate_generator<gen8_t, int_gen> uniform_uint_from8;
+ uniform_uint_from8 uint0300(gen8_03, int_gen(0, 300));
+ check_uniform_int(uint0300, 100000);
 }
 
 #if defined(BOOST_MSVC) && _MSC_VER < 1300
@@ -140,10 +167,13 @@
 class ruetti_gen
 {
 public:
+ ruetti_gen() : state((max)() - 1) {}
   typedef boost::uint64_t result_type;
   result_type min BOOST_PREVENT_MACRO_SUBSTITUTION () const { return 0; }
   result_type max BOOST_PREVENT_MACRO_SUBSTITUTION () const { return std::numeric_limits<result_type>::max BOOST_PREVENT_MACRO_SUBSTITUTION (); }
- result_type operator()() { return (max)()-1; }
+ result_type operator()() { return state--; }
+private:
+ result_type state;
 };
 
 void test_overflow_range()


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk