Boost logo

Boost-Commit :

From: arseny.kapoulkine_at_[hidden]
Date: 2007-08-05 15:20:01


Author: zeux
Date: 2007-08-05 15:19:35 EDT (Sun, 05 Aug 2007)
New Revision: 38460
URL: http://svn.boost.org/trac/boost/changeset/38460

Log:
Removed FFT prototype, added FFT to library (all that's left is correctly choosing the algorithm (basecase/fft) based on input's length)
Added:
   sandbox/SOC/2007/bigint/boost/bigint/bigint_default_config.hpp (contents, props changed)
   sandbox/SOC/2007/bigint/boost/bigint/bigint_fft_multiplicator.hpp (contents, props changed)
   sandbox/SOC/2007/bigint/boost/bigint/fft_primes.hpp (contents, props changed)
   sandbox/SOC/2007/bigint/libs/bigint/src/
   sandbox/SOC/2007/bigint/libs/bigint/src/fft_primes.hpp (contents, props changed)
   sandbox/SOC/2007/bigint/libs/bigint/src/modmul_test.cpp (contents, props changed)
   sandbox/SOC/2007/bigint/libs/bigint/src/roots.cpp (contents, props changed)
Removed:
   sandbox/SOC/2007/bigint/fft/analyze.txt
   sandbox/SOC/2007/bigint/fft/main.cpp
   sandbox/SOC/2007/bigint/fft/primes.cpp
   sandbox/SOC/2007/bigint/fft/primes.h
   sandbox/SOC/2007/bigint/fft/primes_analyze.cpp
   sandbox/SOC/2007/bigint/fft/roots.cpp
Text files modified:
   sandbox/SOC/2007/bigint/boost/bigint/bigint_config.hpp | 3 ++
   sandbox/SOC/2007/bigint/boost/bigint/bigint_default.hpp | 60 +++++++++++++++++++++------------------
   2 files changed, 35 insertions(+), 28 deletions(-)

Modified: sandbox/SOC/2007/bigint/boost/bigint/bigint_config.hpp
==============================================================================
--- sandbox/SOC/2007/bigint/boost/bigint/bigint_config.hpp (original)
+++ sandbox/SOC/2007/bigint/boost/bigint/bigint_config.hpp 2007-08-05 15:19:35 EDT (Sun, 05 Aug 2007)
@@ -13,4 +13,7 @@
 // Define it if GMP implementation is present
 #define BOOST_BIGINT_HAS_GMP_SUPPORT
 
+// Define if platfom has native 64-bit integer support
+// #define BOOST_BIGINT_HAS_NATIVE_INT64
+
 #endif // BOOST_BIGINT_BIGINT_CONFIG_HPP

Modified: sandbox/SOC/2007/bigint/boost/bigint/bigint_default.hpp
==============================================================================
--- sandbox/SOC/2007/bigint/boost/bigint/bigint_default.hpp (original)
+++ sandbox/SOC/2007/bigint/boost/bigint/bigint_default.hpp 2007-08-05 15:19:35 EDT (Sun, 05 Aug 2007)
@@ -15,28 +15,11 @@
 #include <boost/scoped_array.hpp>
 
 #include <boost/bigint/bigint_util.hpp>
+#include <boost/bigint/bigint_default_config.hpp>
 
-namespace boost { namespace detail {
- template <size_t N> struct bigint_default_implementation_config;
-
- template <> struct bigint_default_implementation_config<8>
- {
- typedef boost::uint8_t first;
- typedef boost::uint16_t second;
- };
-
- template <> struct bigint_default_implementation_config<16>
- {
- typedef boost::uint16_t first;
- typedef boost::uint32_t second;
- };
-
- template <> struct bigint_default_implementation_config<32>
- {
- typedef boost::uint32_t first;
- typedef boost::uint64_t second;
- };
+#include <boost/bigint/bigint_fft_multiplicator.hpp>
 
+namespace boost { namespace detail {
         // Default implementation
         template <template <class> class Storage, size_t limb_bit_number = 32> struct bigint_default_implementation
         {
@@ -340,7 +323,7 @@
                                 // we borrowed 2^number of bits in our number - we have to subtract it
                                 // for this we need to complement all limbs to 2, and add 1 to the last limb.
                                 for (limb_t* j = data.begin(); j != data.end(); ++j)
- *j = limb_max()- *j;
+ *j = limb_max() - *j;
                         
                                 data[0]++;
                         }
@@ -384,14 +367,8 @@
                         }
                 }
 
- void mul(const bigint_default_implementation& lhs, const bigint_default_implementation& rhs)
+ void _mul_unsigned_basecase(const bigint_default_implementation& lhs, const bigint_default_implementation& rhs)
                 {
- if (lhs.is_zero() || rhs.is_zero())
- {
- assign(0);
- return;
- }
-
                         if (this == &lhs || this == &rhs)
                         {
                                 bigint_default_implementation copy;
@@ -431,6 +408,33 @@
                         }
 
                         _normalize();
+ }
+
+ void _mul_unsigned_fft(const bigint_default_implementation& lhs, const bigint_default_implementation& rhs)
+ {
+ size_t lhs_size = lhs.data.size();
+ size_t rhs_size = rhs.data.size();
+
+ data.resize(lhs_size + rhs_size);
+
+ if (&lhs != &rhs)
+ bigint_fft_multiplicator<limb_bit_number>::mul(data.begin(), lhs.data.begin(), lhs_size, rhs.data.begin(), rhs_size);
+ else
+ bigint_fft_multiplicator<limb_bit_number>::sqr(data.begin(), lhs.data.begin(), lhs_size);
+
+ _normalize();
+ }
+
+ void mul(const bigint_default_implementation& lhs, const bigint_default_implementation& rhs)
+ {
+ if (lhs.is_zero() || rhs.is_zero())
+ {
+ assign(0);
+ return;
+ }
+
+ // _mul_unsigned_basecase(lhs, rhs);
+ _mul_unsigned_fft(lhs, rhs);
                         
                         negative = lhs.negative ? !rhs.negative : rhs.negative;
                 }

Added: sandbox/SOC/2007/bigint/boost/bigint/bigint_default_config.hpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2007/bigint/boost/bigint/bigint_default_config.hpp 2007-08-05 15:19:35 EDT (Sun, 05 Aug 2007)
@@ -0,0 +1,37 @@
+/* Boost bigint_default_config.hpp header file
+ *
+ * Copyright 2007 Arseny Kapoulkine
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or
+ * copy at http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#ifndef BOOST_BIGINT_BIGINT_DEFAULT_CONFIG_HPP
+#define BOOST_BIGINT_BIGINT_DEFAULT_CONFIG_HPP
+
+#include <boost/cstdint.hpp>
+
+namespace boost { namespace detail {
+ template <size_t N> struct bigint_default_implementation_config;
+
+ template <> struct bigint_default_implementation_config<8>
+ {
+ typedef boost::uint8_t first;
+ typedef boost::uint16_t second;
+ };
+
+ template <> struct bigint_default_implementation_config<16>
+ {
+ typedef boost::uint16_t first;
+ typedef boost::uint32_t second;
+ };
+
+ template <> struct bigint_default_implementation_config<32>
+ {
+ typedef boost::uint32_t first;
+ typedef boost::uint64_t second;
+ };
+} } // namespace boost::detail
+
+#endif // BOOST_BIGINT_BIGINT_DEFAULT_CONFIG_HPP

Added: sandbox/SOC/2007/bigint/boost/bigint/bigint_fft_multiplicator.hpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2007/bigint/boost/bigint/bigint_fft_multiplicator.hpp 2007-08-05 15:19:35 EDT (Sun, 05 Aug 2007)
@@ -0,0 +1,417 @@
+/* Boost bigint_fft_multiplicator.hpp header file
+ *
+ * Copyright 2007 Arseny Kapoulkine
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or
+ * copy at http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#ifndef BOOST_BIGINT_BIGINT_FFT_MULTIPLICATOR_HPP
+#define BOOST_BIGINT_BIGINT_FFT_MULTIPLICATOR_HPP
+
+#include <exception>
+#include <algorithm>
+
+#include <boost/cstdint.hpp>
+#include <boost/bigint/bigint_default_config.hpp>
+
+namespace boost { namespace detail {
+
+#include "fft_primes.hpp"
+
+static const size_t fft_iterative_threshold = 512 * 1024 / sizeof(uint32_t); // assuming 512 Kb L2 cache usage
+
+template <size_t limb_bit_number = 32> struct bigint_fft_multiplicator
+{
+ typedef typename bigint_default_implementation_config<limb_bit_number>::first limb_t;
+ typedef typename bigint_default_implementation_config<limb_bit_number>::second limb2_t;
+
+ static uint32_t modmul(uint32_t a, uint32_t b, uint32_t prime, double inv_prime)
+ {
+ #ifdef BOOST_BIGINT_HAS_NATIVE_INT64
+ return static_cast<uint32_t>(static_cast<uint64_t>(a) * b % prime);
+ #else
+ int r = a * b - prime * static_cast<uint32_t>(inv_prime * static_cast<int>(a) * b);
+ r = (r < 0 ? r + prime : r);
+ return r;
+ #endif
+}
+
+ static uint32_t modadd(uint32_t a, uint32_t b, uint32_t prime)
+ {
+ uint32_t r = a + b;
+ return (r >= prime ? r - prime : r);
+ }
+
+ static uint32_t modsub(uint32_t a, uint32_t b, uint32_t prime)
+ {
+ int r = static_cast<int>(a) - static_cast<int>(b);
+ return (r < 0 ? r + prime : r);
+ }
+
+ static size_t log2(size_t value)
+ {
+ size_t r = 0;
+
+ while (value > 1)
+ {
+ ++r;
+ value >>= 1;
+ }
+
+ return r;
+ }
+
+ // reverse(x) -> reverse(x+1)
+ static size_t rev_next(size_t x, size_t n)
+ {
+ do
+ {
+ n >>= 1;
+ x ^= n;
+ }
+ while ((x & n) == 0);
+
+ return x;
+ }
+
+ // swaps values with indices that have reversed bit representation (data[1100b] <-> data[0011b])
+ template <typename T> static void fft_reorder(T* data, size_t size)
+ {
+ if (size <= 2) return;
+
+ size_t r = 0;
+
+ for (size_t x = 1; x < size; ++x)
+ {
+ r = rev_next(r, size);
+
+ if (r > x) std::swap(data[x], data[r]);
+ }
+ }
+
+ static void fft_iterative(uint32_t* data, size_t size, uint32_t prime, double inv_prime, const uint32_t* roots)
+ {
+ size_t step = 1;
+
+ uint32_t nstep = 0;
+
+ while (step < size)
+ {
+ uint32_t root = roots[++nstep];
+
+ size_t half_step = step;
+ step *= 2;
+
+ uint32_t r = 1;
+
+ for (size_t j = 0; j < half_step; ++j)
+ {
+ for (size_t i = j; i < size; i += step)
+ {
+ uint32_t a = data[i];
+ uint32_t b = modmul(data[i + half_step], r, prime, inv_prime);
+
+ data[i] = modadd(a, b, prime);
+ data[i + half_step] = modsub(a, b, prime);
+ }
+
+ r = modmul(r, root, prime, inv_prime);
+ }
+ }
+ }
+
+ static void fft_recursive(uint32_t* data, size_t size, uint32_t prime, double inv_prime, const uint32_t* roots_start, const uint32_t* roots)
+ {
+ if (size <= fft_iterative_threshold)
+ {
+ return fft_iterative(data, size, prime, inv_prime, roots_start);
+ }
+
+ size_t half_size = size / 2;
+
+ if (half_size > 1)
+ {
+ fft_recursive(data, half_size, prime, inv_prime, roots_start, roots - 1);
+ fft_recursive(data + half_size, half_size, prime, inv_prime, roots_start, roots - 1);
+ }
+
+ uint32_t root = *roots;
+ uint32_t r = 1;
+
+ for (size_t i = 0; i < half_size; ++i)
+ {
+ uint32_t a = data[i];
+ uint32_t b = modmul(data[i + half_size], r, prime, inv_prime);
+
+ data[i] = modadd(a, b, prime);
+ data[i + half_size] = modsub(a, b, prime);
+
+ r = modmul(r, root, prime, inv_prime);
+ }
+ }
+
+ static void dft(uint32_t* dest, const uint16_t* source, size_t N, size_t log2N, uint32_t prime, const uint32_t* root_table)
+ {
+ const uint16_t* source_end = source + N;
+
+ for (uint32_t* di = dest; source != source_end; ++di, ++source)
+ *di = *source;
+
+ fft_recursive(dest, N, prime, 1.0 / prime, root_table, root_table + log2N);
+ }
+
+ static void ift(uint32_t* data, size_t N, size_t log2N, uint32_t prime, const uint32_t* root_table, uint32_t inv_N)
+ {
+ double inv_prime = 1.0 / prime;
+
+ fft_reorder(data, N);
+ fft_recursive(data, N, prime, inv_prime, root_table, root_table + log2N);
+
+ for (size_t i = 0; i < N; ++i)
+ {
+ data[i] = modmul(data[i], inv_N, prime, inv_prime);
+ }
+ }
+
+ // CRT:
+
+ // 64 x == 32 c0 mod 32 p0
+ // 64 x == 32 c1 mod 32 p1
+
+ // x == p0*t + c0
+ // p0*t + c0 == c1 (mod p1)
+ // t == (c1 - c0) div p0 (mod p1)
+
+ // t == t0 (mod p1)
+
+ // x == p0 * t0 + c0
+ // t0 == (c1 - c0) div p0 (mod p1)
+
+ static uint64_t fft_crt(uint32_t c0, uint32_t c1, double inv_prime_1)
+ {
+ uint32_t t0 = modmul(modsub(c1, c0, fft_primes[1]), fft_inv_p0_mod_p1, fft_primes[1], inv_prime_1);
+ return static_cast<uint64_t>(fft_primes[0]) * t0 + c0;
+ }
+
+ static void fft_crt_carry(uint8_t* dest, size_t dest_size, const uint32_t* conv0, const uint32_t* conv1)
+ {
+ double inv_prime_1 = 1.0 / fft_primes[1];
+
+ uint64_t carry = 0;
+
+ uint8_t* dest_end = dest + dest_size;
+
+ if (dest_size % 2 != 0) --dest_end;
+
+ size_t i;
+
+ for (i = 0; dest != dest_end; ++i)
+ {
+ carry += fft_crt(conv0[i], conv1[i], inv_prime_1);
+
+ *dest++ = static_cast<uint8_t>(carry & 0xff);
+ *dest++ = static_cast<uint8_t>(static_cast<uint16_t>(carry & 0xff00) >> 8);
+ carry >>= 16;
+ }
+
+ if (dest_size % 2 != 0)
+ {
+ carry += fft_crt(conv0[i], conv1[i], inv_prime_1);
+
+ *dest++ = static_cast<uint8_t>(carry & 0xff);
+ }
+ }
+
+ static void fft_crt_carry(uint16_t* dest, size_t dest_size, const uint32_t* conv0, const uint32_t* conv1)
+ {
+ double inv_prime_1 = 1.0 / fft_primes[1];
+
+ uint64_t carry = 0;
+
+ uint16_t* dest_end = dest + dest_size;
+
+ for (size_t i = 0; dest != dest_end; ++i)
+ {
+ carry += fft_crt(conv0[i], conv1[i], inv_prime_1);
+
+ *dest++ = static_cast<uint16_t>(carry & 0xffff);
+ carry >>= 16;
+ }
+ }
+
+ static void fft_crt_carry(uint32_t* dest, size_t dest_size, const uint32_t* conv0, const uint32_t* conv1)
+ {
+ double inv_prime_1 = 1.0 / fft_primes[1];
+
+ uint64_t carry = 0;
+
+ uint32_t* dest_end = dest + dest_size;
+
+ for (size_t i = 0; dest != dest_end; i += 2)
+ {
+ uint32_t l;
+
+ carry += fft_crt(conv0[i], conv1[i], inv_prime_1);
+ l = static_cast<limb_t>(carry & 0xffff);
+ carry >>= 16;
+
+ carry += fft_crt(conv0[i+1], conv1[i+1], inv_prime_1);
+ *dest++ = (static_cast<limb_t>(carry & 0xffff) << 16) + l;
+ carry >>= 16;
+ }
+ }
+
+ static void fft_copy_source(uint16_t* dest, size_t N, const uint8_t* source, size_t source_size)
+ {
+ uint16_t* di = dest;
+
+ const uint8_t* source_end = source + source_size;
+
+ if (source_size % 2 != 0) --source_end;
+
+ for (const uint8_t* i = source; i != source_end; i += 2)
+ {
+ *di++ = static_cast<uint16_t>(*i + (*(i+1) << 8));
+ }
+
+ if (source_size % 2 != 0)
+ *di++ = *source_end;
+
+ memset(di, 0, (dest + N - di) * sizeof(uint16_t));
+
+ fft_reorder(dest, N);
+ }
+
+ static void fft_copy_source(uint16_t* dest, size_t N, const uint16_t* source, size_t source_size)
+ {
+ memcpy(dest, source, source_size * sizeof(uint16_t));
+ memset(dest + source_size, 0, (N - source_size) * sizeof(uint16_t));
+ fft_reorder(dest, N);
+ }
+
+ static void fft_copy_source(uint16_t* dest, size_t N, const uint32_t* source, size_t source_size)
+ {
+ uint16_t* di = dest;
+
+ const uint32_t* source_end = source + source_size;
+
+ for (const uint32_t* i = source; i != source_end; ++i)
+ {
+ *di++ = static_cast<uint16_t>(*i & 0xffff);
+ *di++ = static_cast<uint16_t>(*i >> 16);
+ }
+
+ memset(di, 0, (dest + N - di) * sizeof(uint16_t));
+
+ fft_reorder(dest, N);
+ }
+
+ static void mul(limb_t* dest, const limb_t* a, size_t a_size, const limb_t* b, size_t b_size)
+ {
+ size_t max_size = (std::max)(a_size, b_size);
+
+ // round up to the nearest power of two
+ size_t N = 1;
+ while (N < max_size) N *= 2;
+
+ // fix N depending on limb_type
+ N = N * sizeof(limb_t) / sizeof(uint16_t);
+
+ // destination size
+ N *= 2;
+
+ // can we perform FFT?
+ if (N > fft_max_N) throw std::bad_alloc();
+
+ size_t log2N = log2(N);
+
+ uint32_t* workspace = new uint32_t[4*N];
+
+ uint32_t* convs[] = {workspace, workspace + N};
+ uint32_t* fft = workspace + 2*N;
+
+ uint16_t* source_workspace = reinterpret_cast<uint16_t*>(workspace + 3*N);
+
+ uint16_t* source_a = source_workspace;
+ uint16_t* source_b = source_workspace + N;
+
+ fft_copy_source(source_a, N, a, a_size);
+ fft_copy_source(source_b, N, b, b_size);
+
+ for (int p = 0; p < 2; ++p)
+ {
+ uint32_t prime = fft_primes[p];
+ double inv_prime = 1.0 / prime;
+
+ uint32_t* fft_a = fft;
+ uint32_t* fft_b = convs[p];
+
+ dft(fft_a, source_a, N, log2N, prime, fft_primitive_roots[p]);
+ dft(fft_b, source_b, N, log2N, prime, fft_primitive_roots[p]);
+
+ for (size_t i = 0; i < N; ++i)
+ {
+ fft_b[i] = modmul(fft_a[i], fft_b[i], prime, inv_prime);
+ }
+
+ ift(fft_b, N, log2N, prime, fft_inv_primitive_roots[p], fft_inv_N[p][log2N]);
+ }
+
+ fft_crt_carry(dest, a_size + b_size, convs[0], convs[1]);
+
+ delete[] workspace;
+ }
+
+ static void sqr(limb_t* dest, const limb_t* a, size_t a_size)
+ {
+ // round up to the nearest power of two
+ size_t N = 1;
+ while (N < a_size) N *= 2;
+
+ // fix N depending on limb_type
+ N = N * sizeof(limb_t) / sizeof(uint16_t);
+
+ // destination size
+ N *= 2;
+
+ // can we perform FFT?
+ if (N > fft_max_N) throw std::bad_alloc();
+
+ size_t log2N = log2(N);
+
+ uint32_t* workspace = new uint32_t[2*N + N/2];
+
+ uint32_t* convs[] = {workspace, workspace + N};
+
+ uint16_t* source = reinterpret_cast<uint16_t*>(workspace + 2*N);
+
+ fft_copy_source(source, N, a, a_size);
+
+ for (int p = 0; p < 2; ++p)
+ {
+ uint32_t prime = fft_primes[p];
+ double inv_prime = 1.0 / prime;
+
+ uint32_t* fft = convs[p];
+
+ dft(fft, source, N, log2N, prime, fft_primitive_roots[p]);
+
+ for (size_t i = 0; i < N; ++i)
+ {
+ fft[i] = modmul(fft[i], fft[i], prime, inv_prime);
+ }
+
+ ift(fft, N, log2N, prime, fft_inv_primitive_roots[p], fft_inv_N[p][log2N]);
+ }
+
+ fft_crt_carry(dest, a_size * 2, convs[0], convs[1]);
+
+ delete[] workspace;
+ }
+};
+
+} }
+
+#endif // BOOST_BIGINT_BIGINT_FFT_MULTIPLICATOR_HPP

Added: sandbox/SOC/2007/bigint/boost/bigint/fft_primes.hpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2007/bigint/boost/bigint/fft_primes.hpp 2007-08-05 15:19:35 EDT (Sun, 05 Aug 2007)
@@ -0,0 +1,27 @@
+static const uint32_t fft_primes[] = {1811939329, 2013265921};
+
+// 1811939329 = 27 * 2^26 + 1
+// 2013265921 = 15 * 2^27 + 1
+// fft_max_N * base^2 should be less than 1811939329 * 2013265921 = 3647915701995307009 (base = 65536)
+// fft_max_N should be greater or equal than 2^26 = 67108864 due to primes structure
+const size_t fft_max_N = 67108864;
+
+static const uint32_t fft_primitive_roots[2][27] =
+{
+ {1, 1811939328, 394989905, 55917252, 1118888139, 1272218801, 806334172, 436377801, 1023662952, 966955881, 581065507, 1672458129, 1098404717, 963626781, 1186399201, 1597927735, 392626835, 468971248, 1794545288, 1542029237, 1150935898, 1350666604, 1610410665, 1160146422, 342102016, 18496, 136},
+ {1, 2013265920, 284861408, 1801542727, 1446056615, 1399190761, 939236864, 521584899, 46459047, 1260838569, 1313302657, 638351792, 368514500, 1509407754, 1048643618, 1815937125, 451926729, 45920005, 1463305339, 775491602, 1343975138, 728494330, 1000080393, 1478531143, 7311616, 2704, 52}
+};
+
+static const uint32_t fft_inv_primitive_roots[2][27] =
+{
+ {1, 1811939328, 1416949424, 359621496, 1134583214, 281735358, 162591522, 1278842376, 499483227, 1150251226, 1286694572, 1182052503, 671235027, 385336276, 210810912, 1762190734, 88500015, 925755953, 772591754, 451751135, 777519015, 1770749423, 1487423663, 1759406041, 57989169, 1711526385, 839354248},
+ {1, 2013265920, 1728404513, 1592366214, 1816869661, 1123955347, 1326890868, 1192577619, 1311163986, 1959689139, 448785913, 547291506, 903929129, 1482477510, 1133599127, 945906203, 1921914948, 1722814814, 388124777, 1911029444, 1894324124, 1797002609, 452573369, 1816169837, 838408214, 118383610, 116149957}
+};
+
+static const uint32_t fft_inv_N[2][27] =
+{
+ {1, 905969665, 1358954497, 1585446913, 1698693121, 1755316225, 1783627777, 1797783553, 1804861441, 1808400385, 1810169857, 1811054593, 1811496961, 1811718145, 1811828737, 1811884033, 1811911681, 1811925505, 1811932417, 1811935873, 1811937601, 1811938465, 1811938897, 1811939113, 1811939221, 1811939275, 1811939302},
+ {1, 1006632961, 1509949441, 1761607681, 1887436801, 1950351361, 1981808641, 1997537281, 2005401601, 2009333761, 2011299841, 2012282881, 2012774401, 2013020161, 2013143041, 2013204481, 2013235201, 2013250561, 2013258241, 2013262081, 2013264001, 2013264961, 2013265441, 2013265681, 2013265801, 2013265861, 2013265891}
+};
+
+const uint32_t fft_inv_p0_mod_p1 = 10;

Deleted: sandbox/SOC/2007/bigint/fft/analyze.txt
==============================================================================
--- sandbox/SOC/2007/bigint/fft/analyze.txt 2007-08-05 15:19:35 EDT (Sun, 05 Aug 2007)
+++ (empty file)
@@ -1,13 +0,0 @@
-Yo!
-0: 7 13 25 27 67 97 115 135 141 151 177 195 211 235 277 291 297 315 331 343 345 381 391 427 447 471 507 555 577 625 685 711 735 763 765 781 795 811 823 843 865 871 877 883 895 897 913 927 931 961 997 1003 1005 1035 1045 1051 1075 1081 1093 1105 1131 1137 1155 1203 1227 1255 1303 1317 1333 1345 1347 1365 1371 1411 1417 1423 1435 1443 1465 1483 1521 1527 1533 1555 1563 1567 1575 1617 1621 1645 1777 1801 1807 1833 1855 1863 1873 1887 1941 1953 1975 1981 2017 2055 2071 2083 2097 2101 2115 2121 2163 2167 2193 2197 2215 2241 2253 2277 2307 2335 2361 2395 2403 2473 2487 2523 2527 2533 2593 2601 2607 2613 2691 2697 2713 2743 2773 2775 2781 2817 2841 2863 2895 2937 2941 2943 2965 2967 2971 2977 2995 3045 3063 3067 3075 3105 3133 3151 3153 3165 3171 3177 3181 3205 3271 3283 3291 3315 3363 3385 3417 3445 3447 3501 3523 3535 3553 3565 3583 3591 3597 3627 3667 3685 3693 3735 3777 3787 3817 3885 3907 3955 3961 3997 4005 4015 4035 4045 4063 4095
-1: 11 33 39 53 63 65 81 89 95 101 119 123 129 131 183 185 219 221 275 285 303 305 309 339 341 353 375 381 393 429 441 453 459 465 479 483 521 543 549 575 579 581 611 623 633 651 675 735 743 749 789 815 875 891 903 921 953 999 1001 1041 1043 1053 1055 1059 1065 1073 1095 1103 1169 1179 1275 1319 1361 1389 1395 1415 1421 1431 1433 1485 1509 1515 1533 1551 1563 1593 1601 1625 1629 1653 1659 1661 1673 1715 1733 1775 1779 1821 1859 1871 1961 1995 2009 2013 2039
-2: 25 27 33 37 39 55 99 159 163 219 223 225 235 277 289 315 343 375 427 445 483 513 517 553 559 565 573 589 627 637 639 645 649 655 663 669 715 733 757 765 819 823 837 847 853 859 865 879 915 939 957 979
-3: 45 71 77 105 107 119 155 177 249 305 335 347 357 371 395 407 431 437 447 507
-4: 45 73 127 151 157 171 193 235 243
-5: 5 33 51 63 81 125
-6: 7 27 37 43
-7: 15 17 29
-8: 13
-9:
-10: 3
-11:

Deleted: sandbox/SOC/2007/bigint/fft/main.cpp
==============================================================================
--- sandbox/SOC/2007/bigint/fft/main.cpp 2007-08-05 15:19:35 EDT (Sun, 05 Aug 2007)
+++ (empty file)
@@ -1,380 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <windows.h>
-
-#include "primes.h"
-
-template <typename T> void P(const char* text, const T* data, size_t size)
-{
- std::cout << text;
- for (size_t i = 0; i < size; ++i) std::cout << " " << std::dec << data[i] << std::dec;
- std::cout << std::endl;
-}
-
-template <typename T> void S(const char* file, const T* data, size_t size)
-{
- std::ofstream out(file);
- for (size_t i = 0; i < size; ++i) out << std::hex << data[i] << std::dec;
-}
-
-unsigned int modmul(unsigned int a, unsigned int b, unsigned int prime)
-{
- return static_cast<unsigned int>(static_cast<unsigned __int64>(a) * b % prime);
-}
-
-unsigned int modadd(unsigned int a, unsigned int b, unsigned int prime)
-{
- return (a + b) % prime;
-}
-
-unsigned int modsub(unsigned int a, unsigned int b, unsigned int prime)
-{
- return (a + prime - b) % prime;
-}
-
-unsigned int log2(unsigned int value)
-{
- unsigned int r = 0;
-
- while (value > 1)
- {
- ++r;
- value >>= 1;
- }
-
- return r;
-}
-
-// reverse(x) -> reverse(x+1)
-size_t rev_next(size_t x, size_t n)
-{
- do
- {
- n >>= 1;
- x ^= n;
- }
- while ((x & n) == 0);
-
- return x;
-}
-
-// swaps values with indices that have reversed bit representation (data[1100b] <-> data[0011b])
-template <typename T> void fft_copy_reorder(unsigned int* dest, const T* source, size_t size)
-{
- if (size <= 2)
- {
- std::copy(source, source + size, dest);
- return;
- }
-
- size_t r = 0;
-
- dest[0] = source[0];
-
- for (size_t x = 1; x < size; ++x)
- {
- r = rev_next(r, size);
-
- dest[x] = source[r];
- }
-}
-
-void fft_recursive(unsigned int* data, size_t size, unsigned int prime, const unsigned int* roots)
-{
- size /= 2;
-
- if (size > 1)
- {
- fft_recursive(data, size, prime, roots - 1);
- fft_recursive(data + size, size, prime, roots - 1);
- }
-
- unsigned int root = *roots;
- unsigned int r = 1;
-
- for (size_t i = 0; i < size; ++i)
- {
- unsigned int a = data[i];
- unsigned int b = modmul(data[i + size], r, prime);
-
- data[i] = modadd(a, b, prime);
- data[i + size] = modsub(a, b, prime);
-
- r = modmul(r, root, prime);
- }
-}
-
-void fft_iterative(unsigned int* data, size_t size, unsigned int prime, const unsigned int* roots)
-{
- size_t step = 1;
-
- unsigned int nstep = 0;
-
- while (step < size)
- {
- unsigned int root = roots[++nstep];
-
- size_t half_step = step;
- step *= 2;
-
- unsigned int r = 1;
-
- for (size_t j = 0; j < half_step; ++j)
- {
- for (size_t i = j; i < size; i += step)
- {
- unsigned int a = data[i];
- unsigned int b = modmul(data[i + half_step], r, prime);
-
- data[i] = modadd(a, b, prime);
- data[i + half_step] = modsub(a, b, prime);
- }
-
- r = modmul(r, root, prime);
- }
- }
-}
-
-void dft(unsigned int* dest, const unsigned short* source, size_t N, unsigned int log2N, unsigned int prime, const unsigned int* root_table)
-{
- fft_copy_reorder(dest, source, N);
- fft_recursive(dest, N, prime, root_table + log2N);
-}
-
-void dft_i(unsigned int* dest, const unsigned short* source, size_t N, unsigned int log2N, unsigned int prime, const unsigned int* root_table)
-{
- fft_copy_reorder(dest, source, N);
- fft_iterative(dest, N, prime, root_table);
-}
-
-void ift(unsigned int* dest, const unsigned int* source, size_t N, unsigned int log2N, unsigned int prime, const unsigned int* root_table, unsigned int inv_N)
-{
- fft_copy_reorder(dest, source, N);
- fft_recursive(dest, N, prime, root_table + log2N);
-
- for (size_t i = 0; i < N; ++i)
- {
- dest[i] = modmul(dest[i], inv_N, prime);
- }
-}
-
-void ift_i(unsigned int* dest, const unsigned int* source, size_t N, unsigned int log2N, unsigned int prime, const unsigned int* root_table, unsigned int inv_N)
-{
- fft_copy_reorder(dest, source, N);
- fft_iterative(dest, N, prime, root_table);
-
- for (size_t i = 0; i < N; ++i)
- {
- dest[i] = modmul(dest[i], inv_N, prime);
- }
-}
-
-void fft_crt_carry(unsigned short* dest, const unsigned int* conv0, const unsigned int* conv1, size_t N)
-{
- unsigned __int64 prime = static_cast<unsigned __int64>(fft_primes[0]) * fft_primes[1];
- unsigned int inv_p0_mod_p1 = fft_inv_p0_mod_p1;
-
- unsigned __int64 carry = 0;
-
- for (int i = 0; i < N; ++i)
- {
- // 64 x == 32 c0 mod 32 p0
- // 64 x == 32 c1 mod 32 p1
-
- // x == p0*t + c0
- // p0*t + c0 == c1 mod p1
- // t == (c1 - c0) div p0 mod p1
-
- // t == t0 mod p1
-
- // x == p0 * t0 + c0
- // t0 == (c1 - c0) div p0 mod p1
-
- unsigned int t0 = modmul(modsub(conv1[i], conv0[i], fft_primes[1]), inv_p0_mod_p1, fft_primes[1]);
- unsigned __int64 x = static_cast<unsigned __int64>(fft_primes[0]) * t0 + conv0[i];
-
- carry += x;
-
- dest[i] = carry & 0xffff;
- carry >>= 16;
- }
-}
-
-// precondition: dest has N elements allocated, a and b have N elements, last N/2 are 0, N is power of two
-// postcondition: dest has product, padded with zeroes
-void mul_fft(unsigned short* dest, const unsigned short* a, const unsigned short* b, size_t N,
- void (*dft)(unsigned int* dest, const unsigned short* source, size_t N, unsigned int log2N, unsigned int prime, const unsigned int* root_table),
- void (*ift)(unsigned int* dest, const unsigned int* source, size_t N, unsigned int log2N, unsigned int prime, const unsigned int* root_table, unsigned int inv_N)
- )
-{
- unsigned int log2N = log2(N);
-
- unsigned int* workspace = new unsigned int[4*N];
-
- unsigned int* convs[] = {workspace, workspace + N};
- unsigned int* fft_a = workspace + 2*N;
- unsigned int* fft_b = workspace + 3*N;
-
- for (int p = 0; p < 2; ++p)
- {
- const unsigned int* root_table = fft_primitive_roots[p];
- unsigned int prime = fft_primes[p];
-
- dft(fft_a, a, N, log2N, prime, root_table);
- dft(fft_b, b, N, log2N, prime, root_table);
-
- for (size_t i = 0; i < N; ++i)
- {
- fft_a[i] = modmul(fft_a[i], fft_b[i], prime);
- }
-
- const unsigned int* inv_root_table = fft_inv_primitive_roots[p];
- unsigned int inv_N = fft_inv_N[p][log2N];
-
- ift(convs[p], fft_a, N, log2N, prime, inv_root_table, inv_N);
- }
-
- fft_crt_carry(dest, convs[0], convs[1], N);
-
- delete[] workspace;
-}
-
-// precondition: dest has N elements allocated, a has N elements, last N/2 are 0, N is power of two
-// postcondition: dest has product, padded with zeroes
-void sqr_fft(unsigned short* dest, const unsigned short* a, size_t N,
- void (*dft)(unsigned int* dest, const unsigned short* source, size_t N, unsigned int log2N, unsigned int prime, const unsigned int* root_table),
- void (*ift)(unsigned int* dest, const unsigned int* source, size_t N, unsigned int log2N, unsigned int prime, const unsigned int* root_table, unsigned int inv_N)
- )
-{
- unsigned int log2N = log2(N);
-
- unsigned int* workspace = new unsigned int[3*N];
-
- unsigned int* convs[] = {workspace, workspace + N};
- unsigned int* fft = workspace + 2*N;
-
- for (int p = 0; p < 2; ++p)
- {
- const unsigned int* root_table = fft_primitive_roots[p];
- unsigned int prime = fft_primes[p];
-
- dft(fft, a, N, log2N, prime, root_table);
-
- for (size_t i = 0; i < N; ++i)
- {
- fft[i] = modmul(fft[i], fft[i], prime);
- }
-
- const unsigned int* inv_root_table = fft_inv_primitive_roots[p];
- unsigned int inv_N = fft_inv_N[p][log2N];
-
- ift(convs[p], fft, N, log2N, prime, inv_root_table, inv_N);
- }
-
- fft_crt_carry(dest, convs[0], convs[1], N);
-
- delete[] workspace;
-}
-
-void mul_basecase(unsigned short* dest, const unsigned short* a, const unsigned short* b, size_t N)
-{
- typedef unsigned int limb2_t;
-
- memset(dest, 0, sizeof(unsigned short) * N);
-
- unsigned short* i = dest;
-
- for (const unsigned short* li = a; li != a + N/2; ++li, ++i)
- {
- unsigned short carry = 0;
-
- unsigned short* ci = i;
-
- for (const unsigned short* ri = b; ri != b + N/2; ++ri)
- {
- limb2_t result = static_cast<limb2_t>(*li) * *ri + *ci + carry;
-
- *ci++ = static_cast<unsigned short>(result & 0xffff);
-
- carry = static_cast<unsigned short>(result >> 16);
- }
-
- while (carry != 0)
- {
- limb2_t result = static_cast<limb2_t>(*ci) + carry;
-
- *ci++ = static_cast<unsigned short>(result & 0xffff);
-
- carry = static_cast<unsigned short>(result >> 16);
- }
- }
-}
-
-int main()
-{
- size_t bits = (1 << 20) * 16;
- size_t N = bits / 16;
-
- unsigned short* a = new unsigned short[N];
- unsigned short* b = new unsigned short[N];
- unsigned short* c = new unsigned short[N];
-
- for (size_t i = 0; i < N/2; ++i)
- {
- a[i] = b[i] = 65535;
- }
-
- for (size_t i = N/2; i < N; ++i)
- {
- a[i] = b[i] = 0;
- }
-
- int count = 1;
- LARGE_INTEGER start, end, freq;
-
- QueryPerformanceFrequency(&freq);
-
- QueryPerformanceCounter(&start);
- for (int i = 0; i < count; ++i)
- mul_fft(c, a, b, N, dft, ift);
- QueryPerformanceCounter(&end);
-
- std::cout << "Recursive: " << (float)(end.QuadPart - start.QuadPart) / freq.QuadPart << std::endl;
- S("res_fft_rec.txt", c, N);
-
- QueryPerformanceCounter(&start);
- for (int i = 0; i < count; ++i)
- mul_fft(c, a, b, N, dft_i, ift_i);
- QueryPerformanceCounter(&end);
-
- std::cout << "Iterative: " << (float)(end.QuadPart - start.QuadPart) / freq.QuadPart << std::endl;
- S("res_fft_iter.txt", c, N);
-
- QueryPerformanceCounter(&start);
- for (int i = 0; i < count; ++i)
- sqr_fft(c, a, N, dft, ift);
- QueryPerformanceCounter(&end);
-
- std::cout << "Recursive SQR: " << (float)(end.QuadPart - start.QuadPart) / freq.QuadPart << std::endl;
- S("res_fft_rec_sqr.txt", c, N);
-
- QueryPerformanceCounter(&start);
- for (int i = 0; i < count; ++i)
- sqr_fft(c, a, N, dft_i, ift_i);
- QueryPerformanceCounter(&end);
-
- std::cout << "Iterative SQR: " << (float)(end.QuadPart - start.QuadPart) / freq.QuadPart << std::endl;
- S("res_fft_iter_sqr.txt", c, N);
-
- QueryPerformanceCounter(&start);
- for (int i = 0; i < count; ++i)
- mul_basecase(c, a, b, N);
- QueryPerformanceCounter(&end);
-
- std::cout << "Basecase: " << (float)(end.QuadPart - start.QuadPart) / freq.QuadPart << std::endl;
- S("res_basecase.txt", c, N);
-
- delete[] a;
- delete[] b;
- delete[] c;
-}

Deleted: sandbox/SOC/2007/bigint/fft/primes.cpp
==============================================================================
--- sandbox/SOC/2007/bigint/fft/primes.cpp 2007-08-05 15:19:35 EDT (Sun, 05 Aug 2007)
+++ (empty file)
@@ -1,38 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <algorithm>
-#include <vector>
-
-int main()
-{
- const unsigned int size = 4294967294;
-
- std::vector<unsigned int> primes;
-
- for (unsigned int i = 2; i < size; ++i)
- {
- bool prime = true;
-
- for (size_t j = 0; j < primes.size(); ++j)
- {
- if ((unsigned __int64)primes[j] * primes[j] > i) break;
-
- if (i % primes[j] == 0)
- {
- prime = false;
- break;
- }
- }
-
- if (prime) primes.push_back(i);
- }
-
- std::cout << primes.size() << std::endl;
-
- std::ofstream out("primes.txt");
-
- for (size_t j = 0; j < primes.size(); ++j)
- {
- out << primes[j] << std::endl;
- }
-}

Deleted: sandbox/SOC/2007/bigint/fft/primes.h
==============================================================================
--- sandbox/SOC/2007/bigint/fft/primes.h 2007-08-05 15:19:35 EDT (Sun, 05 Aug 2007)
+++ (empty file)
@@ -1,27 +0,0 @@
-static const unsigned int fft_primes[] = {70254593, 81788929};
-
-// 70254593 = 67 * 2^20 + 1
-// 81788929 = 39 * 2^21 + 1
-// fft_max_N * base^2 should be less than 70254593 * 81788929 = 5746047918800897 (base = 65536)
-// fft_max_N should be greater or equal than 2^20 = 1048576 due to primes structure
-const unsigned int fft_max_N = 1048576;
-
-static const unsigned int fft_primitive_roots[2][21] =
-{
- {1, 70254592, 50348549, 19383190, 12538576, 54567172, 35010243, 946420, 69401582, 12802663, 39325177, 61326249, 20368503, 32379324, 58783494, 58098554, 49169846, 5764801, 2401, 49, 7},
- {1, 81788928, 23980934, 1883838, 32049591, 79935288, 22685020, 73127609, 43778833, 17637033, 63153960, 14279076, 36624597, 52261024, 54282709, 36693892, 78305599, 42953381, 41608963, 418609, 647}
-};
-
-static const unsigned int fft_inv_primitive_roots[2][21] =
-{
- {1, 70254592, 19906044, 52459594, 3183438, 53518501, 39073710, 19737011, 63685585, 64302287, 30531912, 69788710, 18847954, 64069532, 36127269, 61117601, 3300726, 63813627, 61505687, 63085757, 20072741},
- {1, 81788928, 57807995, 1977387, 47596280, 37757181, 33726541, 75044061, 38800727, 16012482, 79766222, 64674754, 434079, 26744811, 67309642, 61891499, 67784436, 73579077, 56837587, 80643596, 76858839}
-};
-
-static const unsigned int fft_inv_N[2][21] =
-{
- {1, 35127297, 52690945, 61472769, 65863681, 68059137, 69156865, 69705729, 69980161, 70117377, 70185985, 70220289, 70237441, 70246017, 70250305, 70252449, 70253521, 70254057, 70254325, 70254459, 70254526},
- {1, 40894465, 61341697, 71565313, 76677121, 79233025, 80510977, 81149953, 81469441, 81629185, 81709057, 81748993, 81768961, 81778945, 81783937, 81786433, 81787681, 81788305, 81788617, 81788773, 81788851}
-};
-
-const unsigned int fft_inv_p0_mod_p1 = 37176793;

Deleted: sandbox/SOC/2007/bigint/fft/primes_analyze.cpp
==============================================================================
--- sandbox/SOC/2007/bigint/fft/primes_analyze.cpp 2007-08-05 15:19:35 EDT (Sun, 05 Aug 2007)
+++ (empty file)
@@ -1,46 +0,0 @@
-#include <stdio.h>
-#include <vector>
-#include <iostream>
-
-int main()
-{
- FILE* f = fopen("primes.txt", "r");
-
- unsigned int p = 0;
-
- std::vector<unsigned int> powers[12];
-
- while (fscanf(f, "%u", &p) >= 0)
- {
- p -= 1;
-
- unsigned int power = 0;
-
- while ((p & 1) == 0)
- {
- ++power;
- p /= 2;
- }
-
- if (power >= 20)
- {
- powers[power - 20].push_back(p);
- }
- }
-
- fclose(f);
-
- std::cout << "Yo!" << std::endl;
-
- for (unsigned int i = 0; i < 12; ++i)
- {
- std::cout << i << ":";
-
- for (size_t j = 0; j < powers[i].size(); ++j)
- {
- std::cout << " " << powers[i][j];
- }
-
- std::cout << std::endl;
- }
-}

Deleted: sandbox/SOC/2007/bigint/fft/roots.cpp
==============================================================================
--- sandbox/SOC/2007/bigint/fft/roots.cpp 2007-08-05 15:19:35 EDT (Sun, 05 Aug 2007)
+++ (empty file)
@@ -1,198 +0,0 @@
-#include <iostream>
-#include <algorithm>
-#include <vector>
-#include <stdio.h>
-
-unsigned int modmul(unsigned int a, unsigned int b, unsigned int prime)
-{
- return static_cast<unsigned int>(static_cast<unsigned __int64>(a) * b % prime);
-}
-
-unsigned int pow(unsigned int base, unsigned int power, unsigned int prime)
-{
- unsigned int pot = 1;
-
- // Find largest power of two that is >= rhs
- while (pot < power && (pot << 1) != 0)
- pot <<= 1;
-
- unsigned int res = 1;
-
- while (pot > 0)
- {
- res = modmul(res, res, prime);
-
- if ((power & pot) != 0)
- {
- res = modmul(res, base, prime);
- }
-
- pot >>= 1;
- }
-
- return res;
-}
-
-int main(int argc, char** argv)
-{
- const unsigned int base = 65536;
-
- if (argc < 3)
- {
- std::cout << "#error You should pass 2 prime numbers here" << std::endl;
- return 0;
- }
-
- unsigned int primes[2] = {_atoi64(argv[1]), _atoi64(argv[2])};
-
- if (primes[0] > 2147483647 || primes[1] > 2147483647)
- {
- std::cout << "#error FFT code relies on all numbers to fit in 31 bits, your primes are too big" << std::endl;
- return 0;
- }
-
- if (primes[0] > primes[1]) std::swap(primes[0], primes[1]);
-
- std::cout << "static const unsigned int fft_primes[] = {" << primes[0] << ", " << primes[1] << "};\n\n";
-
- unsigned int powers[2];
-
- for (int i = 0; i < 2; ++i)
- {
- // find power
- unsigned int t = primes[i] - 1;
-
- unsigned int power = 0;
-
- while (t % 2 == 0)
- {
- ++power;
- t /= 2;
- }
-
- std::cout << "// " << primes[i] << " = " << t << " * 2^" << power << " + 1\n";
-
- powers[i] = power;
- }
-
- // convolution's element is N * base^2 at max, where N is size of each vector (provided we do N * N -> 2N multiplication)
- unsigned __int64 modulus = static_cast<unsigned __int64>(primes[0]) * primes[1];
-
- modulus /= base;
- modulus /= base;
-
- // now modulus is upper bound for N; round it down to 2^k
-
- unsigned int maxN = 1;
- while (maxN * 2 <= modulus) maxN *= 2;
-
- std::cout << "// fft_max_N * base^2 should be less than " << primes[0] << " * " << primes[1] << " = " << static_cast<unsigned __int64>(primes[0]) * primes[1];
- std::cout << " (base = " << base << ")\n";
-
- unsigned int power_base = std::min(powers[0], powers[1]);
- unsigned int power = 1 << power_base;
-
- std::cout << "// fft_max_N should be greater or equal than 2^" << power_base << " = " << power << " due to primes structure" << std::endl;
-
- if (power > maxN)
- {
- while (power > maxN)
- {
- --power_base;
- power /= 2;
- }
- }
- else
- {
- maxN = power;
- }
-
- std::cout << "const unsigned int fft_max_N = " << maxN << ";\n\n";
-
- unsigned int roots[2];
-
- // find primitive roots (for power)
-
- for (int i = 0; i < 2; ++i)
- {
- unsigned int prime = primes[i];
-
- for (unsigned int j = 1; j < prime; ++j)
- {
- // root?
- if (pow(j, power, prime) == 1)
- {
- bool primitive = false;
-
- __int64 r = 1;
-
- for (unsigned int k = 0; k < power; ++k)
- {
- r *= j;
- r %= prime;
-
- if (r == 1)
- {
- primitive = (k == power - 1);
- break;
- }
- }
-
- if (primitive)
- {
- roots[i] = j;
- break;
- }
- }
- }
- }
-
- // now generate full suite
- std::vector<unsigned int> all_roots[2];
-
- for (int i = 0; i < 2; ++i)
- all_roots[i].resize(power_base + 1);
-
- for (int inv = 0; inv < 2; ++inv)
- {
- for (int i = 0; i < 2; ++i)
- {
- all_roots[i][power_base] = inv ? pow(roots[i], primes[i] - 2, primes[i]) : roots[i];
-
- for (int j = power_base; j > 0; --j)
- {
- all_roots[i][j-1] = modmul(all_roots[i][j], all_roots[i][j], primes[i]);
- }
- }
-
- std::cout << "static const unsigned int fft_" << (inv ? "inv_" : "") << "primitive_roots[2][" << power_base + 1 << "] =\n{\n";
-
- for (int i = 0; i < 2; ++i)
- {
- std::cout << "\t{";
-
- for (int j = 0; j <= power_base; ++j)
- std::cout << all_roots[i][j] << (j == power_base ? "" : ", ");
-
- std::cout << "}" << ("," + (i == 1)) << "\n";
- }
-
- std::cout << "};\n\n";
- }
-
- std::cout << "static const unsigned int fft_inv_N[2][" << power_base + 1 << "] =\n{\n";
- for (int i = 0; i < 2; ++i)
- {
- std::cout << "\t{";
-
- for (int j = 0; j <= power_base; ++j)
- std::cout << pow(1 << (unsigned int)j, primes[i] - 2, primes[i]) << (j == power_base ? "" : ", ");
-
- std::cout << "}" << ("," + (i == 1)) << "\n";
- }
- std::cout << "};\n\n";
-
- unsigned int inv_p0_mod_p1 = pow(primes[0], primes[1] - 2, primes[1]);
-
- std::cout << "const unsigned int fft_inv_p0_mod_p1 = " << inv_p0_mod_p1 << ";\n";
-}

Added: sandbox/SOC/2007/bigint/libs/bigint/src/fft_primes.hpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2007/bigint/libs/bigint/src/fft_primes.hpp 2007-08-05 15:19:35 EDT (Sun, 05 Aug 2007)
@@ -0,0 +1,27 @@
+static const uint32_t fft_primes[] = {1811939329, 2013265921};
+
+// 1811939329 = 27 * 2^26 + 1
+// 2013265921 = 15 * 2^27 + 1
+// fft_max_N * base^2 should be less than 1811939329 * 2013265921 = 3647915701995307009 (base = 65536)
+// fft_max_N should be greater or equal than 2^26 = 67108864 due to primes structure
+const size_t fft_max_N = 67108864;
+
+static const uint32_t fft_primitive_roots[2][27] =
+{
+ {1, 1811939328, 394989905, 55917252, 1118888139, 1272218801, 806334172, 436377801, 1023662952, 966955881, 581065507, 1672458129, 1098404717, 963626781, 1186399201, 1597927735, 392626835, 468971248, 1794545288, 1542029237, 1150935898, 1350666604, 1610410665, 1160146422, 342102016, 18496, 136},
+ {1, 2013265920, 284861408, 1801542727, 1446056615, 1399190761, 939236864, 521584899, 46459047, 1260838569, 1313302657, 638351792, 368514500, 1509407754, 1048643618, 1815937125, 451926729, 45920005, 1463305339, 775491602, 1343975138, 728494330, 1000080393, 1478531143, 7311616, 2704, 52}
+};
+
+static const uint32_t fft_inv_primitive_roots[2][27] =
+{
+ {1, 1811939328, 1416949424, 359621496, 1134583214, 281735358, 162591522, 1278842376, 499483227, 1150251226, 1286694572, 1182052503, 671235027, 385336276, 210810912, 1762190734, 88500015, 925755953, 772591754, 451751135, 777519015, 1770749423, 1487423663, 1759406041, 57989169, 1711526385, 839354248},
+ {1, 2013265920, 1728404513, 1592366214, 1816869661, 1123955347, 1326890868, 1192577619, 1311163986, 1959689139, 448785913, 547291506, 903929129, 1482477510, 1133599127, 945906203, 1921914948, 1722814814, 388124777, 1911029444, 1894324124, 1797002609, 452573369, 1816169837, 838408214, 118383610, 116149957}
+};
+
+static const uint32_t fft_inv_N[2][27] =
+{
+ {1, 905969665, 1358954497, 1585446913, 1698693121, 1755316225, 1783627777, 1797783553, 1804861441, 1808400385, 1810169857, 1811054593, 1811496961, 1811718145, 1811828737, 1811884033, 1811911681, 1811925505, 1811932417, 1811935873, 1811937601, 1811938465, 1811938897, 1811939113, 1811939221, 1811939275, 1811939302},
+ {1, 1006632961, 1509949441, 1761607681, 1887436801, 1950351361, 1981808641, 1997537281, 2005401601, 2009333761, 2011299841, 2012282881, 2012774401, 2013020161, 2013143041, 2013204481, 2013235201, 2013250561, 2013258241, 2013262081, 2013264001, 2013264961, 2013265441, 2013265681, 2013265801, 2013265861, 2013265891}
+};
+
+const uint32_t fft_inv_p0_mod_p1 = 10;

Added: sandbox/SOC/2007/bigint/libs/bigint/src/modmul_test.cpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2007/bigint/libs/bigint/src/modmul_test.cpp 2007-08-05 15:19:35 EDT (Sun, 05 Aug 2007)
@@ -0,0 +1,53 @@
+/* Boost modmul_test.cpp header file
+ *
+ * Copyright 2007 Arseny Kapoulkine
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or
+ * copy at http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include <iostream>
+
+#include "fft_primes.hpp"
+
+unsigned int modmul_native(unsigned int a, unsigned int b, unsigned int prime)
+{
+ return static_cast<unsigned int>(static_cast<unsigned __int64>(a) * b % prime);
+}
+
+unsigned int modmul_fast(unsigned int a, unsigned int b, unsigned int prime, double inv_prime)
+{
+ int r = a * b - prime * static_cast<unsigned int>(inv_prime * static_cast<int>(a) * b);
+ r = (r < 0 ? r + prime : r);
+ return r;
+}
+
+int main()
+{
+ const unsigned int step_a = 100;
+ const unsigned int step_b = 100;
+
+ for (int p = 0; p < 2; ++p)
+ {
+ unsigned int prime = fft_primes[p];
+ double inv_prime = 1.0 / prime;
+
+ std::cout << "Testing prime " << prime << std::endl;
+
+ for (unsigned int a = 0; a < prime; a += step_a)
+ {
+ std::cout << ".";
+
+ for (unsigned int b = 0; b < prime; b += step_b)
+ {
+ if (modmul_native(a, b, prime) != modmul_fast(a, b, prime, inv_prime))
+ {
+ std::cout << std::endl;
+ std::cout << "Test failed! a = " << a << ", b = " << b << ", prime = " << prime << std::endl;
+ exit(0);
+ }
+ }
+ }
+ }
+}

Added: sandbox/SOC/2007/bigint/libs/bigint/src/roots.cpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2007/bigint/libs/bigint/src/roots.cpp 2007-08-05 15:19:35 EDT (Sun, 05 Aug 2007)
@@ -0,0 +1,207 @@
+/* Boost roots.cpp header file
+ *
+ * Copyright 2007 Arseny Kapoulkine
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or
+ * copy at http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include <iostream>
+#include <algorithm>
+#include <vector>
+#include <stdio.h>
+
+unsigned int modmul(unsigned int a, unsigned int b, unsigned int prime)
+{
+ return static_cast<unsigned int>(static_cast<unsigned __int64>(a) * b % prime);
+}
+
+unsigned int pow(unsigned int base, unsigned int power, unsigned int prime)
+{
+ unsigned int pot = 1;
+
+ // Find largest power of two that is >= rhs
+ while (pot < power && (pot << 1) != 0)
+ pot <<= 1;
+
+ unsigned int res = 1;
+
+ while (pot > 0)
+ {
+ res = modmul(res, res, prime);
+
+ if ((power & pot) != 0)
+ {
+ res = modmul(res, base, prime);
+ }
+
+ pot >>= 1;
+ }
+
+ return res;
+}
+
+int main(int argc, char** argv)
+{
+ const unsigned int base = 65536;
+
+ if (argc < 3)
+ {
+ std::cout << "#error You should pass 2 prime numbers here" << std::endl;
+ return 0;
+ }
+
+ unsigned int primes[2] = {static_cast<unsigned int>(_atoi64(argv[1])), static_cast<unsigned int>(_atoi64(argv[2]))};
+
+ if (primes[0] > 2147483647 || primes[1] > 2147483647)
+ {
+ std::cout << "#error FFT code relies on all numbers to fit in 31 bits, your primes are too big" << std::endl;
+ return 0;
+ }
+
+ if (primes[0] > primes[1]) std::swap(primes[0], primes[1]);
+
+ std::cout << "static const uint32_t fft_primes[] = {" << primes[0] << ", " << primes[1] << "};\n\n";
+
+ unsigned int powers[2];
+
+ for (int i = 0; i < 2; ++i)
+ {
+ // find power
+ unsigned int t = primes[i] - 1;
+
+ unsigned int power = 0;
+
+ while (t % 2 == 0)
+ {
+ ++power;
+ t /= 2;
+ }
+
+ std::cout << "// " << primes[i] << " = " << t << " * 2^" << power << " + 1\n";
+
+ powers[i] = power;
+ }
+
+ // convolution's element is N * base^2 at max, where N is size of each vector (provided we do N * N -> 2N multiplication)
+ unsigned __int64 modulus = static_cast<unsigned __int64>(primes[0]) * primes[1];
+
+ modulus /= base;
+ modulus /= base;
+
+ // now modulus is upper bound for N; round it down to 2^k
+
+ unsigned int maxN = 1;
+ while (maxN * 2 <= modulus) maxN *= 2;
+
+ std::cout << "// fft_max_N * base^2 should be less than " << primes[0] << " * " << primes[1] << " = " << static_cast<unsigned __int64>(primes[0]) * primes[1];
+ std::cout << " (base = " << base << ")\n";
+
+ unsigned int power_base = std::min(powers[0], powers[1]);
+ unsigned int power = 1 << power_base;
+
+ std::cout << "// fft_max_N should be greater or equal than 2^" << power_base << " = " << power << " due to primes structure" << std::endl;
+
+ if (power > maxN)
+ {
+ while (power > maxN)
+ {
+ --power_base;
+ power /= 2;
+ }
+ }
+ else
+ {
+ maxN = power;
+ }
+
+ std::cout << "const size_t fft_max_N = " << maxN << ";\n\n";
+
+ unsigned int roots[2];
+
+ // find primitive roots (for power)
+
+ for (int i = 0; i < 2; ++i)
+ {
+ unsigned int prime = primes[i];
+
+ for (unsigned int j = 1; j < prime; ++j)
+ {
+ // root?
+ if (pow(j, power, prime) == 1)
+ {
+ bool primitive = false;
+
+ __int64 r = 1;
+
+ for (unsigned int k = 0; k < power; ++k)
+ {
+ r *= j;
+ r %= prime;
+
+ if (r == 1)
+ {
+ primitive = (k == power - 1);
+ break;
+ }
+ }
+
+ if (primitive)
+ {
+ roots[i] = j;
+ break;
+ }
+ }
+ }
+ }
+
+ // now generate full suite
+ std::vector<unsigned int> all_roots[2];
+
+ for (int i = 0; i < 2; ++i)
+ all_roots[i].resize(power_base + 1);
+
+ for (int inv = 0; inv < 2; ++inv)
+ {
+ for (int i = 0; i < 2; ++i)
+ {
+ all_roots[i][power_base] = inv ? pow(roots[i], primes[i] - 2, primes[i]) : roots[i];
+
+ for (int j = power_base; j > 0; --j)
+ {
+ all_roots[i][j-1] = modmul(all_roots[i][j], all_roots[i][j], primes[i]);
+ }
+ }
+
+ std::cout << "static const uint32_t fft_" << (inv ? "inv_" : "") << "primitive_roots[2][" << power_base + 1 << "] =\n{\n";
+
+ for (int i = 0; i < 2; ++i)
+ {
+ std::cout << "\t{";
+
+ for (unsigned int j = 0; j <= power_base; ++j)
+ std::cout << all_roots[i][j] << (j == power_base ? "" : ", ");
+
+ std::cout << "}" << ("," + (i == 1)) << "\n";
+ }
+
+ std::cout << "};\n\n";
+ }
+
+ std::cout << "static const uint32_t fft_inv_N[2][" << power_base + 1 << "] =\n{\n";
+ for (int i = 0; i < 2; ++i)
+ {
+ std::cout << "\t{";
+
+ for (unsigned int j = 0; j <= power_base; ++j)
+ std::cout << pow(1 << j, primes[i] - 2, primes[i]) << (j == power_base ? "" : ", ");
+
+ std::cout << "}" << ("," + (i == 1)) << "\n";
+ }
+ std::cout << "};\n\n";
+
+ unsigned int inv_p0_mod_p1 = pow(primes[0], primes[1] - 2, primes[1]);
+
+ std::cout << "const uint32_t fft_inv_p0_mod_p1 = " << inv_p0_mod_p1 << ";\n";
+}


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk