Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r84288 - in trunk: boost/atomic/detail libs/atomic/test
From: tim_at_[hidden]
Date: 2013-05-15 02:12:47


Author: timblechmann
Date: 2013-05-15 02:12:46 EDT (Wed, 15 May 2013)
New Revision: 84288
URL: http://svn.boost.org/trac/boost/changeset/84288

Log:
atomic: Optimization for 64 bit loads and stores for 32 bit x86 on gcc

fixes #8574
Text files modified:
   trunk/boost/atomic/detail/gcc-x86.hpp | 97 +++++++++++++++++++++++++++++++++------
   trunk/libs/atomic/test/lockfree.cpp | 6 ++
   2 files changed, 87 insertions(+), 16 deletions(-)

Modified: trunk/boost/atomic/detail/gcc-x86.hpp
==============================================================================
--- trunk/boost/atomic/detail/gcc-x86.hpp (original)
+++ trunk/boost/atomic/detail/gcc-x86.hpp 2013-05-15 02:12:46 EDT (Wed, 15 May 2013)
@@ -28,6 +28,15 @@
 
 #define BOOST_ATOMIC_X86_PAUSE() __asm__ __volatile__ ("pause\n")
 
+#if defined(__i386__) &&\
+ (\
+ defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) ||\
+ defined(__i586__) || defined(__i686__) || defined(__pentium4__) || defined(__nocona__) || defined(__core2__) || defined(__corei7__) ||\
+ defined(__k6__) || defined(__athlon__) || defined(__k8__) || defined(__amdfam10__) || defined(__bdver1__) || defined(__bdver2__) || defined(__bdver3__) || defined(__btver1__) || defined(__btver2__)\
+ )
+#define BOOST_ATOMIC_X86_HAS_CMPXCHG8B 1
+#endif
+
 inline void
 platform_fence_before(memory_order order)
 {
@@ -198,10 +207,10 @@
 #define BOOST_ATOMIC_INT_LOCK_FREE 2
 #define BOOST_ATOMIC_LONG_LOCK_FREE 2
 
-#if defined(__x86_64__)
+#if defined(__x86_64__) || defined(BOOST_ATOMIC_X86_HAS_CMPXCHG8B)
 #define BOOST_ATOMIC_LLONG_LOCK_FREE 2
 #else
-#define BOOST_ATOMIC_LLONG_LOCK_FREE 1
+#define BOOST_ATOMIC_LLONG_LOCK_FREE 0
 #endif
 
 #define BOOST_ATOMIC_POINTER_LOCK_FREE 2
@@ -1618,11 +1627,11 @@
 };
 #endif
 
-#if !defined(__x86_64__) && (defined(__i686__) || defined (__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8))
+#if !defined(__x86_64__) && defined(BOOST_ATOMIC_X86_HAS_CMPXCHG8B)
 
 template<typename T>
 inline bool
-platform_cmpxchg64_strong(T & expected, T desired, volatile T * ptr)
+platform_cmpxchg64_strong(T & expected, T desired, volatile T * ptr) BOOST_NOEXCEPT
 {
 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
     const T oldval = __sync_val_compare_and_swap(ptr, expected, desired);
@@ -1630,7 +1639,7 @@
     expected = oldval;
     return result;
 #else
- int scratch;
+ uint32_t scratch;
     T prev = expected;
     /* Make sure ebx is saved and restored properly in case
     this object is compiled as "position independent". Since
@@ -1652,7 +1661,7 @@
         "lock; cmpxchg8b 0(%4)\n"
         "movl %1, %%ebx\n"
         : "=A" (prev), "=m" (scratch)
- : "D" ((int)desired), "c" ((int)(desired >> 32)), "S" (ptr), "0" (prev)
+ : "D" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), "S" (ptr), "0" (prev)
         : "memory");
     bool success = (prev == expected);
     expected = prev;
@@ -1660,14 +1669,47 @@
 #endif
 }
 
+// Intel 64 and IA-32 Architectures Software Developer's Manual, Volume 3A, 8.1.1. Guaranteed Atomic Operations:
+//
+// The Pentium processor (and newer processors since) guarantees that the following additional memory operations will always be carried out atomically:
+// * Reading or writing a quadword aligned on a 64-bit boundary
+//
+// Luckily, the memory is almost always 8-byte aligned in our case because atomic<> uses 64 bit native types for storage and dynamic memory allocations
+// have at least 8 byte alignment. The only unfortunate case is when atomic is placeod on the stack and it is not 8-byte aligned (like on 32 bit Windows).
+
 template<typename T>
 inline void
-platform_store64(T value, volatile T * ptr)
+platform_store64(T value, volatile T * ptr) BOOST_NOEXCEPT
 {
- T expected = *ptr;
- for (; !platform_cmpxchg64_strong(expected, value, ptr);)
+ if (((uint32_t)ptr & 0x00000007) == 0)
     {
- BOOST_ATOMIC_X86_PAUSE();
+#if defined(__SSE2__)
+ __asm__ __volatile__
+ (
+ "movq %1, %%xmm0\n\t"
+ "movq %%xmm0, %0\n\t"
+ : "=m" (*ptr)
+ : "m" (value)
+ : "memory", "xmm0"
+ );
+#else
+ __asm__ __volatile__
+ (
+ "fildll %1\n\t"
+ "fistpll %0\n\t"
+ : "=m" (*ptr)
+ : "m" (value)
+ : "memory"
+ );
+#endif
+ }
+ else
+ {
+ T expected = *ptr;
+ while (!platform_cmpxchg64_strong(expected, value, ptr))
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
     }
 }
 
@@ -1675,12 +1717,37 @@
 inline T
 platform_load64(const volatile T * ptr) BOOST_NOEXCEPT
 {
- T expected = *ptr;
- for (; !platform_cmpxchg64_strong(expected, expected, const_cast<volatile T*>(ptr));)
+ T value = T();
+
+ if (((uint32_t)ptr & 0x00000007) == 0)
+ {
+#if defined(__SSE2__)
+ __asm__ __volatile__
+ (
+ "movq %1, %%xmm0\n\t"
+ "movq %%xmm0, %0\n\t"
+ : "=m" (value)
+ : "m" (*ptr)
+ : "memory", "xmm0"
+ );
+#else
+ __asm__ __volatile__
+ (
+ "fildll %1\n\t"
+ "fistpll %0\n\t"
+ : "=m" (value)
+ : "m" (*ptr)
+ : "memory"
+ );
+#endif
+ }
+ else
     {
- BOOST_ATOMIC_X86_PAUSE();
+ // We don't care for comparison result here; the previous value will be stored into value anyway.
+ platform_cmpxchg64_strong(value, value, const_cast<volatile T*>(ptr));
     }
- return expected;
+
+ return value;
 }
 
 #endif
@@ -1690,7 +1757,7 @@
 }
 
 /* pull in 64-bit atomic type using cmpxchg8b above */
-#if !defined(__x86_64__) && (defined(__i686__) || defined (__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8))
+#if !defined(__x86_64__) && defined(BOOST_ATOMIC_X86_HAS_CMPXCHG8B)
 #include <boost/atomic/detail/cas64strong.hpp>
 #endif
 

Modified: trunk/libs/atomic/test/lockfree.cpp
==============================================================================
--- trunk/libs/atomic/test/lockfree.cpp (original)
+++ trunk/libs/atomic/test/lockfree.cpp 2013-05-15 02:12:46 EDT (Wed, 15 May 2013)
@@ -43,7 +43,11 @@
 #define EXPECT_SHORT_LOCK_FREE 2
 #define EXPECT_INT_LOCK_FREE 2
 #define EXPECT_LONG_LOCK_FREE 2
-#define EXPECT_LLONG_LOCK_FREE 1
+#if defined(BOOST_ATOMIC_X86_HAS_CMPXCHG8B)
+#define EXPECT_LLONG_LOCK_FREE 2
+#else
+#define EXPECT_LLONG_LOCK_FREE 0
+#endif
 #define EXPECT_POINTER_LOCK_FREE 2
 #define EXPECT_BOOL_LOCK_FREE 2
 


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk