|
Boost-Commit : |
From: arseny.kapoulkine_at_[hidden]
Date: 2007-08-01 15:04:33
Author: zeux
Date: 2007-08-01 15:04:31 EDT (Wed, 01 Aug 2007)
New Revision: 38346
URL: http://svn.boost.org/trac/boost/changeset/38346
Log:
FFT multiplication prototype
Added:
sandbox/SOC/2007/bigint/fft/
sandbox/SOC/2007/bigint/fft/analyze.txt (contents, props changed)
sandbox/SOC/2007/bigint/fft/main.cpp (contents, props changed)
sandbox/SOC/2007/bigint/fft/main_slow.cpp (contents, props changed)
sandbox/SOC/2007/bigint/fft/main_word.cpp (contents, props changed)
sandbox/SOC/2007/bigint/fft/main_word_fast.cpp (contents, props changed)
sandbox/SOC/2007/bigint/fft/primes.cpp (contents, props changed)
sandbox/SOC/2007/bigint/fft/primes_analyze.cpp (contents, props changed)
sandbox/SOC/2007/bigint/fft/roots.cpp (contents, props changed)
sandbox/SOC/2007/bigint/fft/sieve.cpp (contents, props changed)
Added: sandbox/SOC/2007/bigint/fft/analyze.txt
==============================================================================
--- (empty file)
+++ sandbox/SOC/2007/bigint/fft/analyze.txt 2007-08-01 15:04:31 EDT (Wed, 01 Aug 2007)
@@ -0,0 +1,13 @@
+Yo!
+0: 7 13 25 27 67 97 115 135 141 151 177 195 211 235 277 291 297 315 331 343 345 381 391 427 447 471 507 555 577 625 685 711 735 763 765 781 795 811 823 843 865 871 877 883 895 897 913 927 931 961 997 1003 1005 1035 1045 1051 1075 1081 1093 1105 1131 1137 1155 1203 1227 1255 1303 1317 1333 1345 1347 1365 1371 1411 1417 1423 1435 1443 1465 1483 1521 1527 1533 1555 1563 1567 1575 1617 1621 1645 1777 1801 1807 1833 1855 1863 1873 1887 1941 1953 1975 1981 2017 2055 2071 2083 2097 2101 2115 2121 2163 2167 2193 2197 2215 2241 2253 2277 2307 2335 2361 2395 2403 2473 2487 2523 2527 2533 2593 2601 2607 2613 2691 2697 2713 2743 2773 2775 2781 2817 2841 2863 2895 2937 2941 2943 2965 2967 2971 2977 2995 3045 3063 3067 3075 3105 3133 3151 3153 3165 3171 3177 3181 3205 3271 3283 3291 3315 3363 3385 3417 3445 3447 3501 3523 3535 3553 3565 3583 3591 3597 3627 3667 3685 3693 3735 3777 3787 3817 3885 3907 3955 3961 3997 4005 4015 4035 4045 4063 4095
+1: 11 33 39 53 63 65 81 89 95 101 119 123 129 131 183 185 219 221 275 285 303 305 309 339 341 353 375 381 393 429 441 453 459 465 479 483 521 543 549 575 579 581 611 623 633 651 675 735 743 749 789 815 875 891 903 921 953 999 1001 1041 1043 1053 1055 1059 1065 1073 1095 1103 1169 1179 1275 1319 1361 1389 1395 1415 1421 1431 1433 1485 1509 1515 1533 1551 1563 1593 1601 1625 1629 1653 1659 1661 1673 1715 1733 1775 1779 1821 1859 1871 1961 1995 2009 2013 2039
+2: 25 27 33 37 39 55 99 159 163 219 223 225 235 277 289 315 343 375 427 445 483 513 517 553 559 565 573 589 627 637 639 645 649 655 663 669 715 733 757 765 819 823 837 847 853 859 865 879 915 939 957 979
+3: 45 71 77 105 107 119 155 177 249 305 335 347 357 371 395 407 431 437 447 507
+4: 45 73 127 151 157 171 193 235 243
+5: 5 33 51 63 81 125
+6: 7 27 37 43
+7: 15 17 29
+8: 13
+9:
+10: 3
+11:
Added: sandbox/SOC/2007/bigint/fft/main.cpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2007/bigint/fft/main.cpp 2007-08-01 15:04:31 EDT (Wed, 01 Aug 2007)
@@ -0,0 +1,392 @@
+#include <iostream>
+
+#include <windows.h>
+
+const int base = 10;
+typedef short limb_t;
+
+struct print
+{
+ const limb_t* _data;
+ size_t _size;
+
+ print(const limb_t* data, size_t size): _data(data), _size(size)
+ {
+ }
+
+ std::ostream& operator()(std::ostream& os) const
+ {
+ for (const limb_t* i = _data; i != _data + _size; ++i)
+ os << *i;
+
+ return os;
+ }
+};
+
+template <typename T> void P(const char* text, const T* data, size_t size)
+{
+ std::cout << text << ": ";
+ for (size_t i = 0; i < size; ++i) std::cout << data[i] << " ";
+ std::cout << std::endl;
+}
+
+int pow(int base, int power, int prime)
+{
+ int pot = power;
+
+ // Find largest power of two that is >= rhs
+ while (pot < power && (pot << 1) != 0)
+ pot <<= 1;
+
+ int res = 1;
+
+ while (pot > 0)
+ {
+ res = (res * res) % prime;
+
+ if ((power & pot) != 0)
+ {
+ res = (res * base) % prime;
+ }
+
+ pot >>= 1;
+ }
+
+ return res;
+}
+
+// reverse(x) -> reverse(x+1)
+size_t rev_next(size_t x, size_t n)
+{
+ do
+ {
+ n >>= 1;
+ x ^= n;
+ }
+ while ((x & n) == 0);
+
+ return x;
+}
+
+// swaps values with indices that have reversed bit representation (data[1100b] <-> data[0011b])
+void fft_reorder(unsigned int* data, size_t size)
+{
+ if (size <= 2) return;
+
+ size_t r = 0;
+
+ for (size_t x = 1; x < size; ++x)
+ {
+ r = rev_next(r, size);
+
+ if (r > x) std::swap(data[x], data[r]);
+ }
+}
+
+template <typename T> void fft_copy_reorder(unsigned int* dest, const T* source, size_t size)
+{
+ if (size <= 2)
+ {
+ std::copy(source, source + size, dest);
+ return;
+ }
+
+ size_t r = 0;
+
+ dest[0] = source[0];
+
+ for (size_t x = 1; x < size; ++x)
+ {
+ r = rev_next(r, size);
+
+ dest[x] = source[r];
+ }
+}
+
+void fft_recursive(unsigned int* data, size_t size, int prime, int root)
+{
+ size /= 2;
+
+ if (size > 1)
+ {
+ int new_root = (root * root) % prime;
+ fft_recursive(data, size, prime, new_root);
+ fft_recursive(data + size, size, prime, new_root);
+ }
+
+ unsigned int r = 1;
+
+ for (size_t i = 0; i < size; ++i)
+ {
+ unsigned int a = data[i];
+ unsigned int b = (data[i + size] * r) % prime;
+
+ data[i] = (a + b) % prime;
+ data[i + size] = (a + prime - b) % prime;
+
+ r = (r * root) % prime;
+ }
+}
+
+void fft_iterative(unsigned int* data, size_t size, int prime, int root)
+{
+ size_t step = 1;
+
+ int roots[4];
+ roots[3] = root;
+ roots[2] = (roots[3] * roots[3]) % prime;
+ roots[1] = (roots[2] * roots[2]) % prime;
+ roots[0] = (roots[1] * roots[1]) % prime;
+
+ int nstep = 0;
+
+ while (step < size)
+ {
+ root = roots[nstep++];
+
+ size_t half_step = step;
+ step *= 2;
+
+ unsigned int r = 1;
+
+ for (size_t j = 0; j < half_step; ++j)
+ {
+ for (size_t i = j; i < size; i += step)
+ {
+ unsigned int a = data[i];
+ unsigned int b = (data[i + half_step] * r) % prime;
+
+ data[i] = (a + b) % prime;
+ data[i + half_step] = (a + prime - b) % prime;
+ }
+
+ r = (r * root) % prime;
+ }
+ }
+}
+
+void fft(unsigned int* dest, const limb_t* source, size_t N, int prime, int root)
+{
+ fft_copy_reorder(dest, source, N);
+ fft_recursive(dest, N, prime, root);
+}
+
+void fft_i(unsigned int* dest, const limb_t* source, size_t N, int prime, int root)
+{
+ fft_copy_reorder(dest, source, N);
+ fft_iterative(dest, N, prime, root);
+}
+
+void fft_slow(unsigned int* dest, const limb_t* source, size_t N, int prime, int root)
+{
+ unsigned int p_j = 1;
+
+ for (int j = 0; j < N; ++j)
+ {
+ unsigned int r = 0;
+
+ unsigned int p_jk = 1;
+
+ for (int k = 0; k < N; ++k)
+ {
+ r = (r + source[k] * p_jk) % prime;
+ p_jk = (p_jk * p_j) % prime;
+ }
+
+ dest[j] = r;
+
+ p_j = (p_j * root) % prime;
+ }
+}
+
+void ift(unsigned int* dest, const unsigned int* source, size_t N, int prime, int root)
+{
+ int rroot = pow(root, prime - 2, prime);
+
+ fft_copy_reorder(dest, source, N);
+ fft_recursive(dest, N, prime, rroot);
+
+ unsigned int N_inv = pow(N, prime - 2, prime);
+
+ for (size_t i = 0; i < N; ++i)
+ {
+ dest[i] = dest[i] * N_inv % prime;
+ }
+}
+
+void ift_i(unsigned int* dest, const unsigned int* source, size_t N, int prime, int root)
+{
+ int rroot = pow(root, prime - 2, prime);
+
+ fft_copy_reorder(dest, source, N);
+ fft_iterative(dest, N, prime, rroot);
+
+ unsigned int N_inv = pow(N, prime - 2, prime);
+
+ for (size_t i = 0; i < N; ++i)
+ {
+ dest[i] = dest[i] * N_inv % prime;
+ }
+}
+
+void ift_slow(unsigned int* dest, const unsigned int* source, size_t N, int prime, int root)
+{
+ int rroot = pow(root, prime - 2, prime);
+ int N_inv = pow(N, prime - 2, prime);
+
+ unsigned int p_j = 1;
+
+ for (int j = 0; j < N; ++j)
+ {
+ unsigned int p_jk = 1;
+
+ unsigned int r = 0;
+
+ for (int k = 0; k < N; ++k)
+ {
+ r = (r + source[k] * p_jk) % prime;
+
+ p_jk = (p_jk * p_j) % prime;
+ }
+
+ r = (r * N_inv) % prime;
+
+ dest[j] = r;
+
+ p_j = (p_j * rroot) % prime;
+ }
+}
+
+typedef void (*fftfunc)(unsigned int* dest, const limb_t* source, size_t N, int prime, int root);
+typedef void (*iftfunc)(unsigned int* dest, const unsigned int* source, size_t N, int prime, int root);
+
+void mul_fft(limb_t* dest, const limb_t* a, const limb_t* b, fftfunc fft, iftfunc ift)
+{
+ // p must be 1. prime, 2. of the form k*N+1, where N is higherPOT(length(a) + length(b)), and >= N*base^2
+ const size_t N = 16;
+
+ // N * base^2 == 1600
+ // prime0 * prime1 must be > 1600
+ const int primes[] = {17, 97};
+
+ // primitive root (power N)
+ const int roots[] = {3, 8};
+
+ unsigned int convs[2][N];
+
+ for (int p = 0; p < 2; ++p)
+ {
+ unsigned int fft_a[N], fft_b[N], fft_c[N];
+ fft(fft_a, a, N, primes[p], roots[p]);
+ fft(fft_b, b, N, primes[p], roots[p]);
+
+ for (size_t i = 0; i < N; ++i)
+ {
+ fft_c[i] = (fft_a[i] * fft_b[i]) % primes[p];
+ }
+
+ ift(convs[p], fft_c, N, primes[p], roots[p]);
+ }
+
+ // CRT:
+ // x % p1 = r1
+ // x % p2 = r2
+
+ // x % p1p2 = r1 * (P/p1)*T1 + r2 * (P/p2)*T2
+ // T1 = (P/p1) ^ (p1 - 2)
+ // T2 = (P/p2) ^ (p2 - 2)
+
+ int prime = primes[0] * primes[1];
+ int T[2] = {pow(primes[1], primes[0] - 2, primes[0]), pow(primes[0], primes[1] - 2, primes[1])};
+
+ unsigned int carry = 0;
+
+ for (int i = 0; i < N; ++i)
+ {
+ int conv = (convs[0][i] * T[0] * primes[1] + convs[1][i] * T[1] * primes[0]) % prime;
+
+ carry += conv;
+ dest[i] = carry % 10;
+ carry /= 10;
+ }
+}
+
+void mul_basecase(limb_t* dest, const limb_t* a, const limb_t* b)
+{
+ typedef int limb2_t;
+
+ const size_t N = 16;
+
+ memset(dest, 0, sizeof(limb_t) * N);
+
+ limb_t* i = dest;
+
+ for (const limb_t* li = a; li != a + N/2; ++li, ++i)
+ {
+ limb_t carry = 0;
+
+ limb_t* ci = i;
+
+ for (const limb_t* ri = b; ri != b + N/2; ++ri)
+ {
+ limb2_t result = static_cast<limb2_t>(*li) * *ri + *ci + carry;
+
+ *ci++ = static_cast<limb_t>(result % 10);
+
+ carry = static_cast<limb_t>(result / 10);
+ }
+
+ while (carry != 0)
+ {
+ limb2_t result = static_cast<limb2_t>(*ci) + carry;
+
+ *ci++ = static_cast<limb_t>(result % 10);
+
+ carry = static_cast<limb_t>(result / 10);
+ }
+ }
+}
+
+int main()
+{
+ limb_t a[16] = {2, 3, 4, 5, 6, 7, 8, 9};
+ limb_t b[16] = {1, 2, 3, 4, 5, 6, 7, 8};
+
+ limb_t c[16] = {};
+
+ std::cout << "A = "; print(a, 16)(std::cout) << std::endl;
+ std::cout << "B = "; print(b, 16)(std::cout) << std::endl;
+
+ int count = 1;
+ LARGE_INTEGER start, end, freq;
+
+ QueryPerformanceFrequency(&freq);
+
+ QueryPerformanceCounter(&start);
+ for (int i = 0; i < count; ++i)
+ mul_fft(c, a, b, fft, ift);
+ QueryPerformanceCounter(&end);
+
+ std::cout << "Recursive FFT: C = "; print(c, 16)(std::cout) << " " << (float)(end.QuadPart - start.QuadPart) / freq.QuadPart << std::endl;
+
+ QueryPerformanceCounter(&start);
+ for (int i = 0; i < count; ++i)
+ mul_fft(c, a, b, fft_i, ift_i);
+ QueryPerformanceCounter(&end);
+
+ std::cout << "Iterative FFT: C = "; print(c, 16)(std::cout) << " " << (float)(end.QuadPart - start.QuadPart) / freq.QuadPart << std::endl;
+
+ QueryPerformanceCounter(&start);
+ for (int i = 0; i < count; ++i)
+ mul_fft(c, a, b, fft_slow, ift_slow);
+ QueryPerformanceCounter(&end);
+
+ std::cout << "Slow FT: C = "; print(c, 16)(std::cout) << " " << (float)(end.QuadPart - start.QuadPart) / freq.QuadPart << std::endl;
+
+ QueryPerformanceCounter(&start);
+ for (int i = 0; i < count; ++i)
+ mul_basecase(c, a, b);
+ QueryPerformanceCounter(&end);
+
+ std::cout << "Basecase: C = "; print(c, 16)(std::cout) << " " << (float)(end.QuadPart - start.QuadPart) / freq.QuadPart << std::endl;
+}
Added: sandbox/SOC/2007/bigint/fft/main_slow.cpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2007/bigint/fft/main_slow.cpp 2007-08-01 15:04:31 EDT (Wed, 01 Aug 2007)
@@ -0,0 +1,155 @@
+#include <iostream>
+
+const int base = 10;
+typedef short limb_t;
+
+struct print
+{
+ const limb_t* _data;
+ size_t _size;
+
+ print(const limb_t* data, size_t size): _data(data), _size(size)
+ {
+ }
+
+ std::ostream& operator()(std::ostream& os) const
+ {
+ for (const limb_t* i = _data; i != _data + _size; ++i)
+ os << *i;
+
+ return os;
+ }
+};
+
+int pow(int base, int power, int prime)
+{
+ int res = 1;
+
+ for (int i = 0; i < power; ++i)
+ {
+ res = (res * base) % prime;
+ }
+
+ return res;
+}
+
+void fft(unsigned int* dest, const limb_t* source, size_t N, int prime, int root)
+{
+ for (int j = 0; j < N; ++j)
+ {
+ unsigned int r = 0;
+
+ for (int k = 0; k < N; ++k)
+ {
+ r = (r + ((source[k] * pow(root, j*k, prime)) % prime)) % prime;
+ }
+
+ dest[j] = r;
+ }
+}
+
+void ift(unsigned int* dest, const unsigned int* source, size_t N, int prime, int root)
+{
+ int rroot = pow(root, prime - 2, prime);
+
+ for (int j = 0; j < N; ++j)
+ {
+ unsigned int r = 0;
+
+ for (int k = 0; k < N; ++k)
+ {
+ r = (r + ((source[k] * pow(rroot, j*k, prime)) % prime)) % prime;
+ }
+
+ r = (r * pow(N, prime - 2, prime)) % prime;
+
+ dest[j] = r;
+ }
+}
+
+void mul_fft(limb_t* dest, const limb_t* a, const limb_t* b)
+{
+ // p must be 1. prime, 2. of the form k*N+1, where N is higherPOT(length(a) + length(b)), and >= N*base^2
+ const size_t N = 16;
+
+ // N * base^2 == 1600
+ // prime0 * prime1 must be > 1600
+ const int primes[] = {17, 97};
+
+ // primitive root (power N)
+ const int roots[] = {3, 8};
+
+ unsigned int convs[2][N];
+
+ for (int p = 0; p < 2; ++p)
+ {
+ unsigned int fft_a[N], fft_b[N], fft_c[N];
+ fft(fft_a, a, N, primes[p], roots[p]);
+ fft(fft_b, b, N, primes[p], roots[p]);
+
+ for (size_t i = 0; i < N; ++i)
+ {
+ fft_c[i] = (fft_a[i] * fft_b[i]) % primes[p];
+ }
+
+ ift(convs[p], fft_c, N, primes[p], roots[p]);
+ }
+
+ unsigned int conv[N];
+
+ // CRT:
+ // x % p1 = r1
+ // x % p2 = r2
+
+ // x % p1p2 = r1 * (P/p1)*T1 + r2 * (P/p2)*T2
+ // T1 = (P/p1) ^ (p1 - 2)
+ // T2 = (P/p2) ^ (p2 - 2)
+
+ int prime = primes[0] * primes[1];
+ int T[2] = {pow(primes[1], primes[0] - 2, primes[0]), pow(primes[0], primes[1] - 2, primes[1])};
+
+ for (int i = 0; i < N; ++i)
+ {
+ conv[i] = (convs[0][i] * T[0] * primes[1] + convs[1][i] * T[1] * primes[0]) % prime;
+ }
+
+ std::cout << std::endl;
+ for (int i = 0; i < N; ++i)
+ {
+ std::cout << convs[0][i] << " ";
+ }
+ std::cout << std::endl;
+ for (int i = 0; i < N; ++i)
+ {
+ std::cout << convs[1][i] << " ";
+ }
+ std::cout << std::endl;
+ for (int i = 0; i < N; ++i)
+ {
+ std::cout << conv[i] << " ";
+ }
+ std::cout << std::endl;
+
+ unsigned int carry = 0;
+
+ for (int i = 0; i < N; ++i)
+ {
+ carry += conv[i];
+ dest[i] = carry % 10;
+ carry /= 10;
+ }
+}
+
+int main()
+{
+ limb_t a[16] = {2, 3, 4, 5, 6, 7, 8, 9};
+ limb_t b[16] = {1, 2, 3, 4, 5, 6, 7, 8};
+
+ limb_t c[16] = {};
+
+ mul_fft(c, a, b);
+
+ std::cout << "A = "; print(a, 16)(std::cout) << std::endl;
+ std::cout << "B = "; print(b, 16)(std::cout) << std::endl;
+ std::cout << "C = "; print(c, 16)(std::cout) << std::endl;
+}
Added: sandbox/SOC/2007/bigint/fft/main_word.cpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2007/bigint/fft/main_word.cpp 2007-08-01 15:04:31 EDT (Wed, 01 Aug 2007)
@@ -0,0 +1,260 @@
+#include <iostream>
+
+#include <windows.h>
+
+#define __int64 long long
+
+const unsigned int base = 65536;
+typedef unsigned short limb_t;
+
+template <typename T> void P(const char* text, const T* data, size_t size)
+{
+ std::cout << text;
+ for (size_t i = 0; i < size; ++i) std::cout << " " << std::dec << data[i] << std::dec;
+ std::cout << std::endl;
+}
+
+unsigned int modmul(unsigned int a, unsigned int b, unsigned int prime)
+{
+ return static_cast<unsigned int>(static_cast<unsigned __int64>(a) * b % prime);
+}
+
+unsigned int modadd(unsigned int a, unsigned int b, unsigned int prime)
+{
+ return (a + b) % prime;
+}
+
+unsigned int modsub(unsigned int a, unsigned int b, unsigned int prime)
+{
+ return (a + prime - b) % prime;
+}
+
+unsigned int pow(unsigned int base, unsigned int power, unsigned int prime)
+{
+ unsigned int pot = 1;
+
+ // Find largest power of two that is >= rhs
+ while (pot < power && (pot << 1) != 0)
+ pot <<= 1;
+
+ unsigned int res = 1;
+
+ while (pot > 0)
+ {
+ res = modmul(res, res, prime);
+
+ if ((power & pot) != 0)
+ {
+ res = modmul(res, base, prime);
+ }
+
+ pot >>= 1;
+ }
+
+ return res;
+}
+
+// reverse(x) -> reverse(x+1)
+size_t rev_next(size_t x, size_t n)
+{
+ do
+ {
+ n >>= 1;
+ x ^= n;
+ }
+ while ((x & n) == 0);
+
+ return x;
+}
+
+// swaps values with indices that have reversed bit representation (data[1100b] <-> data[0011b])
+template <typename T> void fft_copy_reorder(unsigned int* dest, const T* source, size_t size)
+{
+ if (size <= 2)
+ {
+ std::copy(source, source + size, dest);
+ return;
+ }
+
+ size_t r = 0;
+
+ dest[0] = source[0];
+
+ for (size_t x = 1; x < size; ++x)
+ {
+ r = rev_next(r, size);
+
+ dest[x] = source[r];
+ }
+}
+
+void fft_recursive(unsigned int* data, size_t size, unsigned int prime, unsigned int root)
+{
+ size /= 2;
+
+ if (size > 1)
+ {
+ unsigned int new_root = modmul(root, root, prime);
+ fft_recursive(data, size, prime, new_root);
+ fft_recursive(data + size, size, prime, new_root);
+ }
+
+ unsigned int r = 1;
+
+ for (size_t i = 0; i < size; ++i)
+ {
+ unsigned int a = data[i];
+ unsigned int b = modmul(data[i + size], r, prime);
+
+ data[i] = modadd(a, b, prime);
+ data[i + size] = modsub(a, b, prime);
+
+ r = modmul(r, root, prime);
+ }
+}
+
+void fft(unsigned int* dest, const limb_t* source, size_t N, unsigned int prime, unsigned int root)
+{
+ fft_copy_reorder(dest, source, N);
+ fft_recursive(dest, N, prime, root);
+}
+
+void ift(unsigned int* dest, const unsigned int* source, size_t N, unsigned int prime, unsigned int root)
+{
+ unsigned int rroot = pow(root, prime - 2, prime);
+
+ fft_copy_reorder(dest, source, N);
+ fft_recursive(dest, N, prime, rroot);
+
+ unsigned int N_inv = pow(N, prime - 2, prime);
+
+ for (size_t i = 0; i < N; ++i)
+ {
+ dest[i] = modmul(dest[i], N_inv, prime);
+ }
+}
+
+void mul_fft(limb_t* dest, const limb_t* a, const limb_t* b)
+{
+ // p must be 1. prime, 2. of the form k*N+1, where N is higherPOT(length(a) + length(b)), and >= N*base^2
+ const size_t N = 16;
+
+ // N * base^2 == 1600
+ // prime0 * prime1 must be > 1600
+ const unsigned int primes[] = {70254593, 81788929};
+
+ // primitive root (power N)
+ const unsigned int roots[] = {3183438, 22821826};
+
+ unsigned int convs[2][N];
+
+ for (int p = 0; p < 2; ++p)
+ {
+ unsigned int fft_a[N], fft_b[N], fft_c[N];
+ fft(fft_a, a, N, primes[p], roots[p]);
+ fft(fft_b, b, N, primes[p], roots[p]);
+
+ for (size_t i = 0; i < N; ++i)
+ {
+ fft_c[i] = modmul(fft_a[i], fft_b[i], primes[p]);
+ }
+
+ ift(convs[p], fft_c, N, primes[p], roots[p]);
+ }
+
+ unsigned __int64 prime = static_cast<unsigned __int64>(primes[0]) * primes[1];
+ unsigned int inv_p0_mod_p1 = pow(primes[0], primes[1] - 2, primes[1]);
+
+ unsigned __int64 carry = 0;
+
+ for (int i = 0; i < N; ++i)
+ {
+ // 64 x == 32 c0 mod 32 p0
+ // 64 x == 32 c1 mod 32 p1
+
+ // x == p0*t + c0
+ // p0*t + c0 == c1 mod p1
+ // t == (c1 - c0) div p0 mod p1
+
+ // t == t0 mod p1
+
+ // x == p0 * t0 + c0
+ // t0 == (c1 - c0) div p0 mod p1
+
+ unsigned int t0 = modmul(modsub(convs[1][i], convs[0][i], primes[1]), inv_p0_mod_p1, primes[1]);
+ unsigned __int64 x = static_cast<unsigned __int64>(primes[0]) * t0 + convs[0][i];
+
+ carry += x;
+
+ dest[i] = carry & 0xffff;
+ carry >>= 16;
+ }
+}
+
+void mul_basecase(limb_t* dest, const limb_t* a, const limb_t* b)
+{
+ typedef unsigned int limb2_t;
+
+ const size_t N = 16;
+
+ memset(dest, 0, sizeof(limb_t) * N);
+
+ limb_t* i = dest;
+
+ for (const limb_t* li = a; li != a + N/2; ++li, ++i)
+ {
+ limb_t carry = 0;
+
+ limb_t* ci = i;
+
+ for (const limb_t* ri = b; ri != b + N/2; ++ri)
+ {
+ limb2_t result = static_cast<limb2_t>(*li) * *ri + *ci + carry;
+
+ *ci++ = static_cast<limb_t>(result & 0xffff);
+
+ carry = static_cast<limb_t>(result >> 16);
+ }
+
+ while (carry != 0)
+ {
+ limb2_t result = static_cast<limb2_t>(*ci) + carry;
+
+ *ci++ = static_cast<limb_t>(result & 0xffff);
+
+ carry = static_cast<limb_t>(result >> 16);
+ }
+ }
+}
+
+int main()
+{
+// limb_t a[16] = {65535, 65533, 65531, 65529, 65527, 65525, 65523, 65521};
+// limb_t b[16] = {65535, 65534, 65533, 65532, 65531, 65530, 65529, 65528};
+ limb_t a[16] = {65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535};
+ limb_t b[16] = {65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535};
+
+ limb_t c[16] = {};
+
+ P("A = ", a, 8);
+ P("B = ", b, 8);
+
+ int count = 10000;
+ LARGE_INTEGER start, end, freq;
+
+ QueryPerformanceFrequency(&freq);
+
+ QueryPerformanceCounter(&start);
+ for (int i = 0; i < count; ++i)
+ mul_fft(c, a, b);
+ QueryPerformanceCounter(&end);
+
+ P("Fast FT : C = ", c, 16); std::cout << (float)(end.QuadPart - start.QuadPart) / freq.QuadPart << std::endl;
+
+ QueryPerformanceCounter(&start);
+ for (int i = 0; i < count; ++i)
+ mul_basecase(c, a, b);
+ QueryPerformanceCounter(&end);
+
+ P("Basecase: C = ", c, 16); std::cout << (float)(end.QuadPart - start.QuadPart) / freq.QuadPart << std::endl;
+}
Added: sandbox/SOC/2007/bigint/fft/main_word_fast.cpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2007/bigint/fft/main_word_fast.cpp 2007-08-01 15:04:31 EDT (Wed, 01 Aug 2007)
@@ -0,0 +1,305 @@
+#include <iostream>
+
+#include <windows.h>
+
+#define __int64 long long
+
+#include "primes.h"
+
+const unsigned int base = 65536;
+typedef unsigned short limb_t;
+
+template <typename T> void P(const char* text, const T* data, size_t size)
+{
+ std::cout << text;
+ for (size_t i = 0; i < size; ++i) std::cout << " " << std::dec << data[i] << std::dec;
+ std::cout << std::endl;
+}
+
+unsigned int modmul(unsigned int a, unsigned int b, unsigned int prime)
+{
+ return static_cast<unsigned int>(static_cast<unsigned __int64>(a) * b % prime);
+}
+
+unsigned int modadd(unsigned int a, unsigned int b, unsigned int prime)
+{
+ return (a + b) % prime;
+}
+
+unsigned int modsub(unsigned int a, unsigned int b, unsigned int prime)
+{
+ return (a + prime - b) % prime;
+}
+
+unsigned int log2(unsigned int value)
+{
+ unsigned int r = 0;
+
+ while (value > 1)
+ {
+ ++r;
+ value >>= 1;
+ }
+
+ return r;
+}
+
+// reverse(x) -> reverse(x+1)
+size_t rev_next(size_t x, size_t n)
+{
+ do
+ {
+ n >>= 1;
+ x ^= n;
+ }
+ while ((x & n) == 0);
+
+ return x;
+}
+
+// swaps values with indices that have reversed bit representation (data[1100b] <-> data[0011b])
+template <typename T> void fft_copy_reorder(unsigned int* dest, const T* source, size_t size)
+{
+ if (size <= 2)
+ {
+ std::copy(source, source + size, dest);
+ return;
+ }
+
+ size_t r = 0;
+
+ dest[0] = source[0];
+
+ for (size_t x = 1; x < size; ++x)
+ {
+ r = rev_next(r, size);
+
+ dest[x] = source[r];
+ }
+}
+
+void fft_recursive(unsigned int* data, size_t size, unsigned int prime, const unsigned int* roots)
+{
+ size /= 2;
+
+ if (size > 1)
+ {
+ fft_recursive(data, size, prime, roots - 1);
+ fft_recursive(data + size, size, prime, roots - 1);
+ }
+
+ unsigned int root = *roots;
+ unsigned int r = 1;
+
+ for (size_t i = 0; i < size; ++i)
+ {
+ unsigned int a = data[i];
+ unsigned int b = modmul(data[i + size], r, prime);
+
+ data[i] = modadd(a, b, prime);
+ data[i + size] = modsub(a, b, prime);
+
+ r = modmul(r, root, prime);
+ }
+}
+
+void fft_iterative(unsigned int* data, size_t size, unsigned int prime, const unsigned int* roots)
+{
+ size_t step = 1;
+
+ unsigned int nstep = 0;
+
+ while (step < size)
+ {
+ unsigned int root = roots[++nstep];
+
+ size_t half_step = step;
+ step *= 2;
+
+ unsigned int r = 1;
+
+ for (size_t j = 0; j < half_step; ++j)
+ {
+ for (size_t i = j; i < size; i += step)
+ {
+ unsigned int a = data[i];
+ unsigned int b = modmul(data[i + half_step], r, prime);
+
+ data[i] = modadd(a, b, prime);
+ data[i + half_step] = modsub(a, b, prime);
+ }
+
+ r = modmul(r, root, prime);
+ }
+ }
+}
+
+void dft(unsigned int* dest, const limb_t* source, size_t N, unsigned int log2N, unsigned int prime, const unsigned int* root_table)
+{
+ fft_copy_reorder(dest, source, N);
+ fft_recursive(dest, N, prime, root_table + log2N);
+}
+
+void dft_i(unsigned int* dest, const limb_t* source, size_t N, unsigned int log2N, unsigned int prime, const unsigned int* root_table)
+{
+ fft_copy_reorder(dest, source, N);
+ fft_iterative(dest, N, prime, root_table);
+}
+
+void ift(unsigned int* dest, const unsigned int* source, size_t N, unsigned int log2N, unsigned int prime, const unsigned int* root_table, unsigned int inv_N)
+{
+ fft_copy_reorder(dest, source, N);
+ fft_recursive(dest, N, prime, root_table + log2N);
+
+ for (size_t i = 0; i < N; ++i)
+ {
+ dest[i] = modmul(dest[i], inv_N, prime);
+ }
+}
+
+void ift_i(unsigned int* dest, const unsigned int* source, size_t N, unsigned int log2N, unsigned int prime, const unsigned int* root_table, unsigned int inv_N)
+{
+ fft_copy_reorder(dest, source, N);
+ fft_iterative(dest, N, prime, root_table);
+
+ for (size_t i = 0; i < N; ++i)
+ {
+ dest[i] = modmul(dest[i], inv_N, prime);
+ }
+}
+
+void mul_fft(limb_t* dest, const limb_t* a, const limb_t* b,
+ void (*dft)(unsigned int* dest, const limb_t* source, size_t N, unsigned int log2N, unsigned int prime, const unsigned int* root_table),
+ void (*ift)(unsigned int* dest, const unsigned int* source, size_t N, unsigned int log2N, unsigned int prime, const unsigned int* root_table, unsigned int inv_N)
+ )
+{
+ // p must be 1. prime, 2. of the form k*N+1, where N is higherPOT(length(a) + length(b)), and >= N*base^2
+ const size_t N = 16;
+
+ unsigned int log2N = log2(N);
+
+ unsigned int convs[2][N];
+
+ for (int p = 0; p < 2; ++p)
+ {
+ const unsigned int* root_table = fft_primitive_roots[p];
+ unsigned int prime = fft_primes[p];
+
+ unsigned int fft_a[N], fft_b[N], fft_c[N];
+ dft(fft_a, a, N, log2N, prime, root_table);
+ dft(fft_b, b, N, log2N, prime, root_table);
+
+ for (size_t i = 0; i < N; ++i)
+ {
+ fft_c[i] = modmul(fft_a[i], fft_b[i], prime);
+ }
+
+ const unsigned int* inv_root_table = fft_inv_primitive_roots[p];
+ unsigned int inv_N = fft_inv_N[p][log2N];
+
+ ift(convs[p], fft_c, N, log2N, prime, inv_root_table, inv_N);
+ }
+
+ unsigned __int64 prime = static_cast<unsigned __int64>(fft_primes[0]) * fft_primes[1];
+ unsigned int inv_p0_mod_p1 = fft_inv_p0_mod_p1;
+
+ unsigned __int64 carry = 0;
+
+ for (int i = 0; i < N; ++i)
+ {
+ // 64 x == 32 c0 mod 32 p0
+ // 64 x == 32 c1 mod 32 p1
+
+ // x == p0*t + c0
+ // p0*t + c0 == c1 mod p1
+ // t == (c1 - c0) div p0 mod p1
+
+ // t == t0 mod p1
+
+ // x == p0 * t0 + c0
+ // t0 == (c1 - c0) div p0 mod p1
+
+ unsigned int t0 = modmul(modsub(convs[1][i], convs[0][i], fft_primes[1]), inv_p0_mod_p1, fft_primes[1]);
+ unsigned __int64 x = static_cast<unsigned __int64>(fft_primes[0]) * t0 + convs[0][i];
+
+ carry += x;
+
+ dest[i] = carry & 0xffff;
+ carry >>= 16;
+ }
+}
+
+void mul_basecase(limb_t* dest, const limb_t* a, const limb_t* b)
+{
+ typedef unsigned int limb2_t;
+
+ const size_t N = 16;
+
+ memset(dest, 0, sizeof(limb_t) * N);
+
+ limb_t* i = dest;
+
+ for (const limb_t* li = a; li != a + N/2; ++li, ++i)
+ {
+ limb_t carry = 0;
+
+ limb_t* ci = i;
+
+ for (const limb_t* ri = b; ri != b + N/2; ++ri)
+ {
+ limb2_t result = static_cast<limb2_t>(*li) * *ri + *ci + carry;
+
+ *ci++ = static_cast<limb_t>(result & 0xffff);
+
+ carry = static_cast<limb_t>(result >> 16);
+ }
+
+ while (carry != 0)
+ {
+ limb2_t result = static_cast<limb2_t>(*ci) + carry;
+
+ *ci++ = static_cast<limb_t>(result & 0xffff);
+
+ carry = static_cast<limb_t>(result >> 16);
+ }
+ }
+}
+
+int main()
+{
+// limb_t a[16] = {65535, 65533, 65531, 65529, 65527, 65525, 65523, 65521};
+// limb_t b[16] = {65535, 65534, 65533, 65532, 65531, 65530, 65529, 65528};
+ limb_t a[16] = {65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535};
+ limb_t b[16] = {65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535};
+
+ limb_t c[16] = {};
+
+ P("A = ", a, 8);
+ P("B = ", b, 8);
+
+ int count = 1000000;
+ LARGE_INTEGER start, end, freq;
+
+ QueryPerformanceFrequency(&freq);
+
+ QueryPerformanceCounter(&start);
+ for (int i = 0; i < count; ++i)
+ mul_fft(c, a, b, dft, ift);
+ QueryPerformanceCounter(&end);
+
+ P("Recursive: C = ", c, 16); std::cout << (float)(end.QuadPart - start.QuadPart) / freq.QuadPart << std::endl;
+
+ QueryPerformanceCounter(&start);
+ for (int i = 0; i < count; ++i)
+ mul_fft(c, a, b, dft_i, ift_i);
+ QueryPerformanceCounter(&end);
+
+ P("Iterative: C = ", c, 16); std::cout << (float)(end.QuadPart - start.QuadPart) / freq.QuadPart << std::endl;
+
+ QueryPerformanceCounter(&start);
+ for (int i = 0; i < count; ++i)
+ mul_basecase(c, a, b);
+ QueryPerformanceCounter(&end);
+
+ P("Basecase : C = ", c, 16); std::cout << (float)(end.QuadPart - start.QuadPart) / freq.QuadPart << std::endl;
+}
Added: sandbox/SOC/2007/bigint/fft/primes.cpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2007/bigint/fft/primes.cpp 2007-08-01 15:04:31 EDT (Wed, 01 Aug 2007)
@@ -0,0 +1,38 @@
+#include <iostream>
+#include <fstream>
+#include <algorithm>
+#include <vector>
+
+int main()
+{
+ const unsigned int size = 4294967294;
+
+ std::vector<unsigned int> primes;
+
+ for (unsigned int i = 2; i < size; ++i)
+ {
+ bool prime = true;
+
+ for (size_t j = 0; j < primes.size(); ++j)
+ {
+ if ((unsigned __int64)primes[j] * primes[j] > i) break;
+
+ if (i % primes[j] == 0)
+ {
+ prime = false;
+ break;
+ }
+ }
+
+ if (prime) primes.push_back(i);
+ }
+
+ std::cout << primes.size() << std::endl;
+
+ std::ofstream out("primes.txt");
+
+ for (size_t j = 0; j < primes.size(); ++j)
+ {
+ out << primes[j] << std::endl;
+ }
+}
Added: sandbox/SOC/2007/bigint/fft/primes_analyze.cpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2007/bigint/fft/primes_analyze.cpp 2007-08-01 15:04:31 EDT (Wed, 01 Aug 2007)
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <vector>
+#include <iostream>
+
+int main()
+{
+ FILE* f = fopen("primes.txt", "r");
+
+ unsigned int p = 0;
+
+ std::vector<unsigned int> powers[12];
+
+ while (fscanf(f, "%u", &p) >= 0)
+ {
+ p -= 1;
+
+ unsigned int power = 0;
+
+ while ((p & 1) == 0)
+ {
+ ++power;
+ p /= 2;
+ }
+
+ if (power >= 20)
+ {
+ powers[power - 20].push_back(p);
+ }
+ }
+
+ fclose(f);
+
+ std::cout << "Yo!" << std::endl;
+
+ for (unsigned int i = 0; i < 12; ++i)
+ {
+ std::cout << i << ":";
+
+ for (size_t j = 0; j < powers[i].size(); ++j)
+ {
+ std::cout << " " << powers[i][j];
+ }
+
+ std::cout << std::endl;
+ }
+}
Added: sandbox/SOC/2007/bigint/fft/roots.cpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2007/bigint/fft/roots.cpp 2007-08-01 15:04:31 EDT (Wed, 01 Aug 2007)
@@ -0,0 +1,155 @@
+#include <iostream>
+#include <algorithm>
+#include <vector>
+#include <stdio.h>
+
+unsigned int modmul(unsigned int a, unsigned int b, unsigned int prime)
+{
+ return static_cast<unsigned int>(static_cast<unsigned __int64>(a) * b % prime);
+}
+
+unsigned int pow(unsigned int base, unsigned int power, unsigned int prime)
+{
+ unsigned int pot = 1;
+
+ // Find largest power of two that is >= rhs
+ while (pot < power && (pot << 1) != 0)
+ pot <<= 1;
+
+ unsigned int res = 1;
+
+ while (pot > 0)
+ {
+ res = modmul(res, res, prime);
+
+ if ((power & pot) != 0)
+ {
+ res = modmul(res, base, prime);
+ }
+
+ pot >>= 1;
+ }
+
+ return res;
+}
+
+int main(int argc, char** argv)
+{
+ if (argc < 3) return 0;
+
+ unsigned int primes[2] = {atoi(argv[1]), atoi(argv[2])};
+
+ std::cout << "static const unsigned int fft_primes[] = {" << primes[0] << ", " << primes[1] << "};\n\n";
+
+ unsigned int powers[2];
+
+ for (int i = 0; i < 2; ++i)
+ {
+ // find power
+ unsigned int t = primes[i] - 1;
+
+ unsigned int power = 0;
+
+ while (t % 2 == 0)
+ {
+ ++power;
+ t /= 2;
+ }
+
+ std::cout << "// " << primes[i] << " = " << t << " * 2^" << power << " + 1\n";
+
+ powers[i] = power;
+ }
+
+ std::cout << std::endl;
+
+ unsigned int power_base = std::min(powers[0], powers[1]);
+ unsigned int power = 1 << power_base;
+
+ unsigned int roots[2];
+
+ // find primitive roots (for power)
+
+ for (int i = 0; i < 2; ++i)
+ {
+ unsigned int prime = primes[i];
+
+ for (unsigned int j = 1; j < prime; ++j)
+ {
+ // root?
+ if (pow(j, power, prime) == 1)
+ {
+ bool primitive = false;
+
+ __int64 r = 1;
+
+ for (unsigned int k = 0; k < power; ++k)
+ {
+ r *= j;
+ r %= prime;
+
+ if (r == 1)
+ {
+ primitive = (k == power - 1);
+ break;
+ }
+ }
+
+ if (primitive)
+ {
+ roots[i] = j;
+ break;
+ }
+ }
+ }
+ }
+
+ // now generate full suite
+ std::vector<unsigned int> all_roots[2];
+
+ for (int i = 0; i < 2; ++i)
+ all_roots[i].resize(power_base + 1);
+
+ for (int inv = 0; inv < 2; ++inv)
+ {
+ for (int i = 0; i < 2; ++i)
+ {
+ all_roots[i][power_base] = inv ? pow(roots[i], primes[i] - 2, primes[i]) : roots[i];
+
+ for (int j = power_base; j > 0; --j)
+ {
+ all_roots[i][j-1] = modmul(all_roots[i][j], all_roots[i][j], primes[i]);
+ }
+ }
+
+ std::cout << "static const unsigned int fft_" << (inv ? "inv_" : "") << "primitive_roots[2][" << power_base + 1 << "] =\n{\n";
+
+ for (int i = 0; i < 2; ++i)
+ {
+ std::cout << "\t{";
+
+ for (int j = 0; j <= power_base; ++j)
+ std::cout << all_roots[i][j] << (j == power_base ? "" : ", ");
+
+ std::cout << "}" << ("," + (i == 1)) << "\n";
+ }
+
+ std::cout << "};\n\n";
+ }
+
+ std::cout << "static const unsigned int fft_inv_N[2][" << power_base + 1 << "] =\n{\n";
+ for (int i = 0; i < 2; ++i)
+ {
+ std::cout << "\t{";
+
+ for (int j = 0; j <= power_base; ++j)
+ std::cout << pow(1 << (unsigned int)j, primes[i] - 2, primes[i]) << (j == power_base ? "" : ", ");
+
+ std::cout << "}" << ("," + (i == 1)) << "\n";
+ }
+ std::cout << "};\n\n";
+
+ unsigned int inv_p0_mod_p1 = pow(primes[0], primes[1] - 2, primes[1]);
+
+ std::cout << "const unsigned int fft_inv_p0_mod_p1 = " << inv_p0_mod_p1 << ";\n";
+}
Added: sandbox/SOC/2007/bigint/fft/sieve.cpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2007/bigint/fft/sieve.cpp 2007-08-01 15:04:31 EDT (Wed, 01 Aug 2007)
@@ -0,0 +1,42 @@
+#include <iostream>
+#include <fstream>
+#include <algorithm>
+
+int main()
+{
+ const size_t size = 100000000;
+
+ bool* sieve = new bool[size];
+
+ std::fill(sieve, sieve + size, true);
+
+ sieve[0] = false;
+ sieve[1] = false;
+
+ for (size_t i = 2; i < size; ++i)
+ {
+ if (i % (size / 10000) == 0) {
+ std::cout << (float)(i / (size / 10000)) / 100 << " %" << std::endl;
+ }
+
+ if (sieve[i])
+ {
+ for (size_t k = i + i; k < size; k += i)
+ {
+ sieve[k] = false;
+ }
+ }
+ }
+
+ std::ofstream out("primes.txt");
+
+ for (size_t i = 0; i < size; ++i)
+ {
+ if (sieve[i] && i % 1048576 == 1)
+ {
+ out << i << " ";
+ }
+ }
+
+ delete[] sieve;
+}
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk