Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r82096 - in sandbox/e_float: boost/e_float libs/e_float/build libs/e_float/src/e_float/efx
From: e_float_at_[hidden]
Date: 2012-12-19 07:53:38


Author: christopher_kormanyos
Date: 2012-12-19 07:53:37 EST (Wed, 19 Dec 2012)
New Revision: 82096
URL: http://svn.boost.org/trac/boost/changeset/82096

Log:
Begin prototyping of a naive FFT.
Text files modified:
   sandbox/e_float/boost/e_float/e_float_base.hpp | 5
   sandbox/e_float/boost/e_float/e_float_efx.hpp | 16 +
   sandbox/e_float/libs/e_float/build/e_float.vcxproj | 24 +-
   sandbox/e_float/libs/e_float/src/e_float/efx/e_float_efx.cpp | 311 +++++++++++++++++++++------------------
   4 files changed, 196 insertions(+), 160 deletions(-)

Modified: sandbox/e_float/boost/e_float/e_float_base.hpp
==============================================================================
--- sandbox/e_float/boost/e_float/e_float_base.hpp (original)
+++ sandbox/e_float/boost/e_float/e_float_base.hpp 2012-12-19 07:53:37 EST (Wed, 19 Dec 2012)
@@ -26,12 +26,9 @@
 
   // Select the number of decimal digits in e_float
   // by setting the value of E_FLOAT_DIGITS10.
- // The supported range is 30-300.
   // Note: This is a compile-time constant.
 
- #if !defined(E_FLOAT_DIGITS10)
- #define E_FLOAT_DIGITS10 50
- #endif
+ #define E_FLOAT_DIGITS10 100
 
   #if defined(E_FLOAT_TYPE_EFX)
     namespace efx { class e_float; }

Modified: sandbox/e_float/boost/e_float/e_float_efx.hpp
==============================================================================
--- sandbox/e_float/boost/e_float/e_float_efx.hpp (original)
+++ sandbox/e_float/boost/e_float/e_float_efx.hpp 2012-12-19 07:53:37 EST (Wed, 19 Dec 2012)
@@ -16,10 +16,24 @@
   #endif
 
   #include <cmath>
+ #include <vector>
   #include <string>
 
   #include <boost/e_float/e_float_base.hpp>
 
+ namespace ef_detail
+ {
+ template <class T, unsigned S>
+ struct dynamic_array : public std::vector<T>
+ {
+ dynamic_array()
+ : std::vector<T>(static_cast<typename std::vector<T>::size_type>(S), static_cast<T>(0)) {}
+
+ T* data() { return &(*this->begin()); }
+ const T* data()const { return &(*this->begin()); }
+ };
+ }
+
   namespace efx
   {
     class e_float : public ::e_float_base
@@ -49,7 +63,7 @@
 
       static const INT32 ef_elem_mask = static_cast<INT32>(100000000);
 
- typedef std::tr1::array<UINT32, static_cast<std::size_t>(ef_elem_number)> array_type;
+ typedef ef_detail::dynamic_array<UINT32, static_cast<std::size_t>(ef_elem_number)> array_type;
 
       array_type data;
       INT64 exp;

Modified: sandbox/e_float/libs/e_float/build/e_float.vcxproj
==============================================================================
--- sandbox/e_float/libs/e_float/build/e_float.vcxproj (original)
+++ sandbox/e_float/libs/e_float/build/e_float.vcxproj 2012-12-19 07:53:37 EST (Wed, 19 Dec 2012)
@@ -242,7 +242,7 @@
       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
       <AdditionalIncludeDirectories>$(SolutionDir)../../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;E_FLOAT_DIGITS10=50;NDEBUG;_CONSOLE;E_FLOAT_TYPE_GMP;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;E_FLOAT_TYPE_GMP;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -285,7 +285,7 @@
       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
       <AdditionalIncludeDirectories>$(SolutionDir)../../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;E_FLOAT_DIGITS10=50;NDEBUG;_CONSOLE;E_FLOAT_TYPE_GMP;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;E_FLOAT_TYPE_GMP;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -327,7 +327,7 @@
       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
       <AdditionalIncludeDirectories>$(SolutionDir)../../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;E_FLOAT_DIGITS10=50;NDEBUG;_CONSOLE;E_FLOAT_TYPE_EFX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;E_FLOAT_TYPE_EFX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -369,7 +369,7 @@
       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
       <AdditionalIncludeDirectories>$(SolutionDir)../../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;E_FLOAT_DIGITS10=50;NDEBUG;_CONSOLE;E_FLOAT_TYPE_EFX;_SECURE_SCL= 0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;E_FLOAT_TYPE_EFX;_SECURE_SCL= 0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -409,7 +409,7 @@
     <ClCompile>
       <Optimization>Disabled</Optimization>
       <AdditionalIncludeDirectories>$(SolutionDir)../../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;E_FLOAT_DIGITS10=50;_DEBUG;_CONSOLE;E_FLOAT_TYPE_GMP;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;E_FLOAT_TYPE_GMP;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <PrecompiledHeader>
@@ -456,7 +456,7 @@
     <ClCompile>
       <Optimization>Disabled</Optimization>
       <AdditionalIncludeDirectories>$(SolutionDir)../../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;E_FLOAT_DIGITS10=50;_DEBUG;_CONSOLE;E_FLOAT_TYPE_GMP;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;E_FLOAT_TYPE_GMP;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <PrecompiledHeader>
@@ -502,7 +502,7 @@
     <ClCompile>
       <Optimization>Disabled</Optimization>
       <AdditionalIncludeDirectories>$(SolutionDir)../../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;E_FLOAT_DIGITS10=50;_DEBUG;_CONSOLE;E_FLOAT_TYPE_EFX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;E_FLOAT_TYPE_EFX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <PrecompiledHeader>
@@ -547,7 +547,7 @@
     <ClCompile>
       <Optimization>Disabled</Optimization>
       <AdditionalIncludeDirectories>$(SolutionDir)../../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;E_FLOAT_DIGITS10=50;_DEBUG;_CONSOLE;E_FLOAT_TYPE_EFX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;E_FLOAT_TYPE_EFX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <PrecompiledHeader>
@@ -593,7 +593,7 @@
       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
       <AdditionalIncludeDirectories>$(SolutionDir)../../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;E_FLOAT_DIGITS10=50;NDEBUG;_CONSOLE;E_FLOAT_TYPE_MPFR;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;E_FLOAT_TYPE_MPFR;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -636,7 +636,7 @@
       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
       <AdditionalIncludeDirectories>$(SolutionDir)../../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;E_FLOAT_DIGITS10=50;NDEBUG;_CONSOLE;E_FLOAT_TYPE_MPFR;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;E_FLOAT_TYPE_MPFR;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -676,7 +676,7 @@
     <ClCompile>
       <Optimization>Disabled</Optimization>
       <AdditionalIncludeDirectories>$(SolutionDir)../../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;E_FLOAT_DIGITS10=50;_DEBUG;_CONSOLE;E_FLOAT_TYPE_MPFR;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;E_FLOAT_TYPE_MPFR;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <PrecompiledHeader>
@@ -724,7 +724,7 @@
     <ClCompile>
       <Optimization>Disabled</Optimization>
       <AdditionalIncludeDirectories>$(SolutionDir)../../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;E_FLOAT_DIGITS10=50;_DEBUG;_CONSOLE;E_FLOAT_TYPE_MPFR;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;E_FLOAT_TYPE_MPFR;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <PrecompiledHeader>

Modified: sandbox/e_float/libs/e_float/src/e_float/efx/e_float_efx.cpp
==============================================================================
--- sandbox/e_float/libs/e_float/src/e_float/efx/e_float_efx.cpp (original)
+++ sandbox/e_float/libs/e_float/src/e_float/efx/e_float_efx.cpp 2012-12-19 07:53:37 EST (Wed, 19 Dec 2012)
@@ -399,20 +399,17 @@
   return static_cast<UINT32>(carry);
 }
 
-template<const bool is_forward = true,
+template<const bool is_forward_fft = true,
          typename float_type = double>
 struct fft_lanczos
 {
   static void fft(const UINT32 n, float_type* f)
   {
- UINT32 nn = n << 1;
- UINT32 mmax;
- UINT32 m;
+ const UINT32 nn = n << 1;
+
     UINT32 j = 1U;
- UINT32 istep;
- UINT32 i;
 
- for(i = 1U; i < nn; i += 2U)
+ for(UINT32 i = 1U; i < nn; i += 2U)
     {
       if(j > i)
       {
@@ -420,7 +417,7 @@
         std::swap(f[j], f[i]);
       }
 
- m = n;
+ UINT32 m = n;
 
       while((m >= 2U) && (j > m))
       {
@@ -431,193 +428,215 @@
       j += m;
     }
 
- mmax = 2U;
+ UINT32 m_max = 2U;
 
- while(nn > mmax)
+ while(nn > m_max)
     {
- istep = mmax << 1;
-
- float_type theta = (is_forward ? +float_type(6.2831853071795864769252867665590057683943) / mmax
- : -float_type(6.2831853071795864769252867665590057683943) / mmax);
+ float_type theta = (is_forward_fft ? +float_type(float_type(6.2831853071795864769252867665590057683943) / m_max)
+ : -float_type(float_type(6.2831853071795864769252867665590057683943) / m_max));
 
- float_type wtemp = ::sin(float_type(0.5) * theta);
- float_type wpr = -2 * (wtemp * wtemp);
+ float_type w_tmp = ::sin(float_type(0.5) * theta);
+ float_type wpr = -2 * (w_tmp * w_tmp);
       float_type wpi = ::sin(theta);
- float_type wr(1);
- float_type wi(0);
 
- for(m = 1U; m < mmax; m += 2U)
+ float_type wr(1.0);
+ float_type wi(0.0);
+
+ const UINT32 istep = m_max << 1;
+
+ for(UINT32 m = 1U; m < m_max; m += 2U)
       {
- for(i = m; i <= nn; i += istep)
+ for(UINT32 i = m; i <= nn; i += istep)
         {
- j = i + mmax;
- float_type tempr = wr * f[j -1U] - wi * f[j];
- float_type tempi = wr * f[j] + wi * f[j - 1U];
- f[j - 1U] = f[i - 1U] - tempr;
- f[j] = f[i] - tempi;
- f[i - 1U] += tempr;
- f[i] += tempi;
+ const UINT32 i_max = i + m_max;
+
+ const float_type tmp_r = (wr * f[i_max - 1U]) - (wi * f[i_max]);
+ const float_type tmp_i = (wr * f[i_max]) + (wi * f[i_max - 1U]);
+
+ f[i_max - 1U] = f[i - 1U] - tmp_r;
+ f[i_max] = f[i] - tmp_i;
+
+ f[i - 1U] += tmp_r;
+ f[i] += tmp_i;
         }
 
- wr = ((wtemp = wr) * wpr) - (wi * wpi) + wr;
- wi = (wi * wpr) + (wtemp * wpi) + wi;
+ w_tmp = wr;
+
+ wr = ((w_tmp * wpr) - (wi * wpi)) + wr;
+ wi = ((wi * wpr) + (w_tmp * wpi)) + wi;
       }
 
- mmax = istep;
+ m_max = istep;
     }
   }
 };
 
-template<typename float_type = double>
-struct rfft_lanczos
+namespace
 {
- static void rfft(const UINT32 n, float_type* f, const bool is_forward = true)
+ template<typename float_type = double>
+ struct rfft_lanczos
   {
- UINT32 i1;
- UINT32 i2;
- UINT32 i3;
- UINT32 i4;
- UINT32 i;
+ static void rfft(const UINT32 n, float_type* f, const bool is_forward = true)
+ {
+ float_type theta = float_type(float_type(3.1415926535897932384626433832795028841972) / (n >> 1));
 
- float_type c1(0.5);
- float_type c2;
+ float_type c1(0.5);
+ float_type c2;
 
- float_type h1r;
+ if(is_forward)
+ {
+ c2 = float_type(-0.5);
+ fft_lanczos<true, float_type>::fft(static_cast<UINT32>(n / 2U), f);
+ }
+ else
+ {
+ c2 = float_type(0.5);
+ theta = -theta;
+ }
 
- float_type theta = float_type(3.1415926535897932384626433832795028841972) / (n >> 1);
+ const float_type sin_half_theta = ::sin(float_type(0.5) * theta);
+ const float_type wpr = -2 * (sin_half_theta * sin_half_theta);
+ const float_type wpi = ::sin(theta);
 
- if(is_forward)
- {
- c2 = float_type(-0.5);
- fft_lanczos<true, float_type>::fft(n / 2, f);
- }
- else
- {
- c2 = float_type(0.5);
- theta = -theta;
- }
+ float_type wr = float_type(1.0) + wpr;
+ float_type wi = wpi;
 
- float_type wtemp = ::sin(float_type(0.5) * theta);
- float_type wpr = -2 * (wtemp * wtemp);
- float_type wpi = ::sin(theta);
- float_type wr = 1 + wpr;
- float_type wi = wpi;
+ for(UINT32 i = static_cast<UINT32>(1U); i < static_cast<UINT32>(n >> 2); ++i)
+ {
+ const UINT32 i1 = static_cast<UINT32>(i + i);
+ const UINT32 i3 = static_cast<UINT32>(n - i1);
 
- for(i = 1U; i < (n >> 2); ++i)
- {
- i2 = 1 + (i1 = i + i);
- i4 = 1 + (i3 = n - i1);
+ const UINT32 i2 = static_cast<UINT32>(1U + i1);
+ const UINT32 i4 = static_cast<UINT32>(1U + i3);
 
- float_type h1r = c1 * (f[i1] + f[i3]);
- float_type h1i = c1 * (f[i2] - f[i4]);
+ const float_type h1r = c1 * (f[i1] + f[i3]);
+ const float_type h1i = c1 * (f[i2] - f[i4]);
 
- float_type h2r = -c2 * (f[i2] + f[i4]);
- float_type h2i = c2 * (f[i1] - f[i3]);
+ const float_type h2r = -c2 * (f[i2] + f[i4]);
+ const float_type h2i = c2 * (f[i1] - f[i3]);
 
- f[i1] = h1r + wr * h2r - wi * h2i;
- f[i2] = h1i + wr * h2i + wi * h2r;
- f[i3] = h1r - wr * h2r + wi * h2i;
- f[i4] = -h1i + wr * h2i + wi * h2r;
+ f[i1] = ( h1r + (wr * h2r)) - (wi * h2i);
+ f[i2] = ( h1i + (wr * h2i)) + (wi * h2r);
+ f[i3] = ( h1r - (wr * h2r)) + (wi * h2i);
+ f[i4] = (-h1i + (wr * h2i)) + (wi * h2r);
 
- wr = ((wtemp = wr) * wpr) - wi * wpi + wr;
- wi = wi * wpr + wtemp * wpi + wi;
- }
+ const float_type w_tmp = wr;
 
- if(is_forward)
- {
- f[0U] = (h1r = f[0U]) + f[1U];
- f[1U] = h1r - f[1U];
- }
- else
- {
- f[0U] = c1 * ((h1r = f[0U]) + f[1U]);
- f[1U] = c1 * (h1r - f[1U]);
- fft_lanczos<false, float_type>::fft(n / 2, f);
+ wr = ((w_tmp * wpr) - (wi * wpi)) + wr;
+ wi = ((wi * wpr) + (w_tmp * wpi)) + wi;
+ }
+
+ const float_type f0_tmp = f[0U];
+
+ if(is_forward)
+ {
+ f[0U] = f0_tmp + f[1U];
+ f[1U] = f0_tmp - f[1U];
+ }
+ else
+ {
+ f[0U] = c1 * (f0_tmp + f[1U]);
+ f[1U] = c1 * (f0_tmp - f[1U]);
+
+ fft_lanczos<false, float_type>::fft(static_cast<UINT32>(n / 2U), f);
+ }
     }
- }
-};
+ };
+}
 
 void efx::e_float::mul_loop_fft(UINT32* const u, const UINT32* const v, const INT32 p)
 {
- // Determine the required FFT size (p).
- UINT32 n_fft = 1U;
+ // Determine the required FFT size,
+ // where n_fft is constrained to be a power of two.
+ UINT32 n_fft = 1U;
 
- for(unsigned i = 1; i < 31U; ++i)
- {
- n_fft <<= 1U;
+ for(unsigned i = 1U; i < 31U; ++i)
+ {
+ n_fft <<= 1U;
 
- if(n_fft >= (p * 2U))
- {
- break;
- }
+ // We now have the needed size (doubled).
+ // The size is doubled in order to contain the multiplication result.
+ if(n_fft >= static_cast<UINT32>(p * 2U))
+ {
+ break;
     }
+ }
 
- n_fft <<= 1;
+ // Again, double the FFT size in order to contain the
+ // complex-numbered array result format.
+ n_fft <<= 1U;
 
- double* af = new double[n_fft];
- double* bf = new double[n_fft];
+ // Allocate dynamic memory for the FFT result arrays.
+ double* af = new double[n_fft];
+ double* bf = new double[n_fft];
 
- for(UINT32 i = 0U; i < static_cast<UINT32>(p); ++i)
- {
- af[(i * 2U)] = (u[i] / 10000U);
- af[(i * 2U) + 1U] = (u[i] % 10000U);
+ for(UINT32 i = static_cast<UINT32>(0U); i < static_cast<UINT32>(p); ++i)
+ {
+ af[(i * 2U)] = (u[i] / 10000U);
+ af[(i * 2U) + 1U] = (u[i] % 10000U);
 
- bf[(i * 2U)] = (v[i] / 10000U);
- bf[(i * 2U) + 1U] = (v[i] % 10000U);
- }
+ bf[(i * 2U)] = (v[i] / 10000U);
+ bf[(i * 2U) + 1U] = (v[i] % 10000U);
+ }
 
- std::fill(af + (p * 2U), af + n_fft, double(0));
- std::fill(bf + (p * 2U), bf + n_fft, double(0));
+ std::fill(af + (p * 2U), af + n_fft, double(0));
+ std::fill(bf + (p * 2U), bf + n_fft, double(0));
 
- rfft_lanczos<>::rfft(n_fft, af);
- rfft_lanczos<>::rfft(n_fft, bf);
+ rfft_lanczos<>::rfft(n_fft, af);
+ rfft_lanczos<>::rfft(n_fft, bf);
 
- af[0U] *= bf[0U];
- af[1U] *= bf[1U];
+ // Do the convolution in the transform space.
+ // This is the multiplication of (a * b).
+ af[0U] *= bf[0U];
+ af[1U] *= bf[1U];
 
- for(UINT32 j = 2U; j < n_fft; j += 2U)
- {
- double t;
- af[j] = ((t = af[j]) * bf[j]) - (af[j + 1U] * bf[j + 1U]);
- af[j + 1U] = (t * bf[j + 1U]) + (af[j + 1U] * bf[j]);
- }
+ for(UINT32 j = static_cast<UINT32>(2U); j < n_fft; j += 2U)
+ {
+ const double tmp_aj = af[j];
 
- rfft_lanczos<>::rfft(n_fft, af, false);
+ af[j] = (tmp_aj * bf[j]) - (af[j + 1U] * bf[j + 1U]);
+ af[j + 1U] = (tmp_aj * bf[j + 1U]) + (af[j + 1U] * bf[j]);
+ }
 
- // Release the carries, re-combine the lo and hi parts and set the data elements.
- UINT64 carry = static_cast<UINT64>(0u);
+ // Perform the reverse transformation.
+ rfft_lanczos<>::rfft(n_fft, af, false);
 
- for(UINT32 j = ((p * 2U) - 2U); static_cast<INT32>(j) >= static_cast<INT32>(0); j -= 2U)
- {
- double xaj = af[j] / (n_fft / 2);
- const UINT64 xlo = static_cast<UINT64>(xaj + 0.5) + carry;
- carry = static_cast<UINT64>(xlo / static_cast<UINT32>(10000));
- const UINT32 nlo = static_cast<UINT32>(xlo - static_cast<UINT64>(carry * static_cast<UINT32>(10000)));
+ // Release the carries.
+ // Re-combine the low and high parts.
+ // And set the data elements.
+ UINT64 carry = static_cast<UINT64>(0U);
 
- xaj = ((j != static_cast<INT32>(0)) ? (af[j - 1u] / (n_fft / 2)) : 0.0);
- const UINT64 xhi = static_cast<UINT64>(xaj + 0.5) + carry;
- carry = static_cast<UINT64>(xhi / static_cast<UINT32>(10000));
- const UINT32 nhi = static_cast<UINT32>(xhi - static_cast<UINT64>(carry * static_cast<UINT32>(10000)));
+ for(UINT32 j = static_cast<UINT32>((p * 2U) - 2U); static_cast<INT32>(j) >= static_cast<INT32>(0); j -= 2U)
+ {
+ double xaj = af[j] / (n_fft / 2);
+ const UINT64 xlo = static_cast<UINT64>(xaj + 0.5) + carry;
+ carry = static_cast<UINT64>(xlo / static_cast<UINT32>(10000U));
+ const UINT32 nlo = static_cast<UINT32>(xlo - static_cast<UINT64>(carry * static_cast<UINT32>(10000U)));
 
- u[(j / 2u)] = static_cast<UINT32>(static_cast<UINT32>(nhi * static_cast<UINT32>(10000)) + nlo);
- }
+ xaj = ((j != static_cast<INT32>(0)) ? (af[j - 1u] / (n_fft / 2)) : 0.0);
+ const UINT64 xhi = static_cast<UINT64>(xaj + 0.5) + carry;
+ carry = static_cast<UINT64>(xhi / static_cast<UINT32>(10000U));
+ const UINT32 nhi = static_cast<UINT32>(xhi - static_cast<UINT64>(carry * static_cast<UINT32>(10000U)));
+
+ u[(j / 2U)] = static_cast<UINT32>(static_cast<UINT32>(nhi * static_cast<UINT32>(10000U)) + nlo);
+ }
 
- delete [] af;
- delete [] bf;
+ delete [] af;
+ delete [] bf;
 }
 
 UINT32 efx::e_float::mul_loop_n(UINT32* const u, UINT32 n, const INT32 p)
 {
- UINT64 carry = static_cast<UINT64>(0u);
+ UINT64 carry = static_cast<UINT64>(0U);
 
   // Multiplication loop.
- for(INT32 j = p - 1; j >= static_cast<INT32>(0); j--)
+ for(INT32 j = static_cast<INT32>(p - 1); j >= static_cast<INT32>(0); j--)
   {
     const UINT64 t = static_cast<UINT64>(carry + static_cast<UINT64>(u[j] * static_cast<UINT64>(n)));
     carry = static_cast<UINT64>(t / static_cast<UINT32>(ef_elem_mask));
     u[j] = static_cast<UINT32>(t - static_cast<UINT64>(static_cast<UINT32>(ef_elem_mask) * static_cast<UINT64>(carry)));
   }
-
+
   return static_cast<UINT32>(carry);
 }
 
@@ -937,14 +956,16 @@
   // Set the exponent of the result.
   exp += v.exp;
 
- const INT32 prec_mul = (std::min)(prec_elem, v.prec_elem);
+ const INT32 prec_for_multiply = (std::min)(prec_elem, v.prec_elem);
+ const INT32 digits10_for_multiply = static_cast<INT32>(prec_for_multiply * ef_elem_digits10);
 
- if(prec_mul < 1000)
+ if(digits10_for_multiply < static_cast<INT32>(3000))
   {
- const UINT32 carry = mul_loop_uv(data.data(), v.data.data(), prec_mul);
+ // Use school multiplication.
+ const UINT32 carry = mul_loop_uv(data.data(), v.data.data(), prec_for_multiply);
 
     // Handle a potential carry.
- if(carry != static_cast<UINT32>(0u))
+ if(carry != static_cast<UINT32>(0U))
     {
       exp += static_cast<INT64>(ef_elem_digits10);
 
@@ -959,7 +980,7 @@
   else
   {
     // Use FFT-based multiplication.
- mul_loop_fft(data.data(), v.data.data(), static_cast<INT32>(prec_mul));
+ mul_loop_fft(data.data(), v.data.data(), static_cast<INT32>(prec_for_multiply));
 
     // Adjust the exponent because of the internal scaling of the FFT multiplication.
     exp += static_cast<INT64>(ef_elem_digits10);
@@ -1209,6 +1230,8 @@
   INT64 ne;
   x.extract_parts(dd, ne);
 
+ const INT32 original_prec_elem = prec_elem;
+
   // Do the inverse estimate using double precision estimates of mantissa and exponent.
   operator=(e_float(1.0 / dd, -ne));
 
@@ -1230,7 +1253,7 @@
 
   neg = b_neg;
 
- prec_elem = ef_elem_number;
+ prec_elem = original_prec_elem;
 
   return *this;
 }
@@ -1270,6 +1293,8 @@
   // Estimate the square root using simple manipulations.
   const double sqd = ::sqrt(dd);
 
+ const INT32 original_prec_elem = prec_elem;
+
   *this = e_float(sqd, static_cast<INT64>(ne / static_cast<INT64>(2)));
 
   // Estimate 1.0 / (2.0 * x0) using simple manipulations.
@@ -1297,10 +1322,10 @@
     vi += vi * (-((*this * vi) * static_cast<INT32>(2)) + ef::one());
 
     // Next iteration of *this
- *this += vi * (-(*this * *this) + x);
+ *this += vi * (-((*this) * (*this)) + x);
   }
 
- prec_elem = ef_elem_number;
+ prec_elem = original_prec_elem;
 
   return *this;
 }


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk