Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r82154 - in branches/release/boost/atomic: . detail
From: andrey.semashev_at_[hidden]
Date: 2012-12-21 15:14:11


Author: andysem
Date: 2012-12-21 15:14:10 EST (Fri, 21 Dec 2012)
New Revision: 82154
URL: http://svn.boost.org/trac/boost/changeset/82154

Log:
Removed unnecessary fences, added pause operations to tight spin loops to optimize power consumption and performance on HT-enabled systems.
Properties modified:
   branches/release/boost/atomic/ (props changed)
Text files modified:
   branches/release/boost/atomic/detail/gcc-armv6plus.hpp | 2
   branches/release/boost/atomic/detail/gcc-x86.hpp | 277 +++++++++++++++++++++---------------
   branches/release/boost/atomic/detail/lockpool.hpp | 8
   branches/release/boost/atomic/detail/windows.hpp | 300 +++++++++------------------------------
   4 files changed, 245 insertions(+), 342 deletions(-)

Modified: branches/release/boost/atomic/detail/gcc-armv6plus.hpp
==============================================================================
--- branches/release/boost/atomic/detail/gcc-armv6plus.hpp (original)
+++ branches/release/boost/atomic/detail/gcc-armv6plus.hpp 2012-12-21 15:14:10 EST (Fri, 21 Dec 2012)
@@ -57,7 +57,7 @@
 // indicates the mode, so 1 is added to indicate that the destination code is Thumb.
 // A temporary register is needed for the address and is passed as an argument to these
 // macros. It must be one of the "low" registers accessible to Thumb code, specified
-// usng the "l" attribute in the asm statement.
+// using the "l" attribute in the asm statement.
 //
 // Architecture v7 introduces "Thumb 2", which does include (almost?) all of the ARM
 // instruction set. So in v7 we don't need to change to ARM mode; we can write "universal

Modified: branches/release/boost/atomic/detail/gcc-x86.hpp
==============================================================================
--- branches/release/boost/atomic/detail/gcc-x86.hpp (original)
+++ branches/release/boost/atomic/detail/gcc-x86.hpp 2012-12-21 15:14:10 EST (Fri, 21 Dec 2012)
@@ -20,128 +20,138 @@
 namespace atomics {
 namespace detail {
 
-#if defined(__x86_64__)
+#if defined(__x86_64__) || defined(__SSE2__)
 # define BOOST_ATOMIC_X86_FENCE_INSTR "mfence\n"
 #else
 # define BOOST_ATOMIC_X86_FENCE_INSTR "lock ; addl $0, (%%esp)\n"
 #endif
 
+#define BOOST_ATOMIC_X86_PAUSE() __asm__ __volatile__ ("pause\n")
+
 inline void
 platform_fence_before(memory_order order)
 {
- switch(order) {
- case memory_order_relaxed:
- case memory_order_acquire:
- case memory_order_consume:
- break;
- case memory_order_release:
- case memory_order_acq_rel:
- __asm__ __volatile__ ("" ::: "memory");
- /* release */
- break;
- case memory_order_seq_cst:
- __asm__ __volatile__ ("" ::: "memory");
- /* seq */
- break;
+ switch(order)
+ {
+ case memory_order_relaxed:
+ case memory_order_acquire:
+ case memory_order_consume:
+ break;
+ case memory_order_release:
+ case memory_order_acq_rel:
+ __asm__ __volatile__ ("" ::: "memory");
+ /* release */
+ break;
+ case memory_order_seq_cst:
+ __asm__ __volatile__ ("" ::: "memory");
+ /* seq */
+ break;
+ default:;
     }
 }
 
 inline void
 platform_fence_after(memory_order order)
 {
- switch(order) {
- case memory_order_relaxed:
- case memory_order_release:
- break;
- case memory_order_acquire:
- case memory_order_acq_rel:
- __asm__ __volatile__ ("" ::: "memory");
- /* acquire */
- break;
- case memory_order_consume:
- /* consume */
- break;
- case memory_order_seq_cst:
- __asm__ __volatile__ ("" ::: "memory");
- /* seq */
- break;
- default:;
+ switch(order)
+ {
+ case memory_order_relaxed:
+ case memory_order_release:
+ break;
+ case memory_order_acquire:
+ case memory_order_acq_rel:
+ __asm__ __volatile__ ("" ::: "memory");
+ /* acquire */
+ break;
+ case memory_order_consume:
+ /* consume */
+ break;
+ case memory_order_seq_cst:
+ __asm__ __volatile__ ("" ::: "memory");
+ /* seq */
+ break;
+ default:;
     }
 }
 
 inline void
 platform_fence_after_load(memory_order order)
 {
- switch(order) {
- case memory_order_relaxed:
- case memory_order_release:
- break;
- case memory_order_acquire:
- case memory_order_acq_rel:
- __asm__ __volatile__ ("" ::: "memory");
- break;
- case memory_order_consume:
- break;
- case memory_order_seq_cst:
- __asm__ __volatile__ (BOOST_ATOMIC_X86_FENCE_INSTR ::: "memory");
- break;
- default:;
+ switch(order)
+ {
+ case memory_order_relaxed:
+ case memory_order_release:
+ break;
+ case memory_order_acquire:
+ case memory_order_acq_rel:
+ __asm__ __volatile__ ("" ::: "memory");
+ break;
+ case memory_order_consume:
+ break;
+ case memory_order_seq_cst:
+ __asm__ __volatile__ ("" ::: "memory");
+ break;
+ default:;
     }
 }
 
 inline void
 platform_fence_before_store(memory_order order)
 {
- switch(order) {
- case memory_order_relaxed:
- case memory_order_acquire:
- case memory_order_consume:
- break;
- case memory_order_release:
- case memory_order_acq_rel:
- __asm__ __volatile__ ("" ::: "memory");
- /* release */
- break;
- case memory_order_seq_cst:
- __asm__ __volatile__ ("" ::: "memory");
- /* seq */
- break;
+ switch(order)
+ {
+ case memory_order_relaxed:
+ case memory_order_acquire:
+ case memory_order_consume:
+ break;
+ case memory_order_release:
+ case memory_order_acq_rel:
+ __asm__ __volatile__ ("" ::: "memory");
+ /* release */
+ break;
+ case memory_order_seq_cst:
+ __asm__ __volatile__ ("" ::: "memory");
+ /* seq */
+ break;
+ default:;
     }
 }
 
 inline void
 platform_fence_after_store(memory_order order)
 {
- switch(order) {
- case memory_order_relaxed:
- case memory_order_release:
- break;
- case memory_order_acquire:
- case memory_order_acq_rel:
- __asm__ __volatile__ ("" ::: "memory");
- /* acquire */
- break;
- case memory_order_consume:
- /* consume */
- break;
- case memory_order_seq_cst:
- __asm__ __volatile__ ("" ::: "memory");
- /* seq */
- break;
- default:;
+ switch(order)
+ {
+ case memory_order_relaxed:
+ case memory_order_release:
+ break;
+ case memory_order_acquire:
+ case memory_order_acq_rel:
+ __asm__ __volatile__ ("" ::: "memory");
+ /* acquire */
+ break;
+ case memory_order_consume:
+ /* consume */
+ break;
+ case memory_order_seq_cst:
+ __asm__ __volatile__ ("" ::: "memory");
+ /* seq */
+ break;
+ default:;
     }
 }
 
 }
 }
 
-class atomic_flag {
+class atomic_flag
+{
 private:
     atomic_flag(const atomic_flag &) /* = delete */ ;
     atomic_flag & operator=(const atomic_flag &) /* = delete */ ;
     uint32_t v_;
 public:
- atomic_flag(void) : v_(false) {}
+ atomic_flag(void) : v_(0) {}
 
     bool
     test_and_set(memory_order order = memory_order_seq_cst) volatile
@@ -203,24 +213,25 @@
 inline void
 atomic_thread_fence(memory_order order)
 {
- switch(order) {
- case memory_order_relaxed:
- break;
- case memory_order_release:
- __asm__ __volatile__ ("" ::: "memory");
- break;
- case memory_order_acquire:
- __asm__ __volatile__ ("" ::: "memory");
- break;
- case memory_order_acq_rel:
- __asm__ __volatile__ ("" ::: "memory");
- break;
- case memory_order_consume:
- break;
- case memory_order_seq_cst:
- __asm__ __volatile__ (BOOST_ATOMIC_X86_FENCE_INSTR ::: "memory");
- break;
- default:;
+ switch(order)
+ {
+ case memory_order_relaxed:
+ break;
+ case memory_order_release:
+ __asm__ __volatile__ ("" ::: "memory");
+ break;
+ case memory_order_acquire:
+ __asm__ __volatile__ ("" ::: "memory");
+ break;
+ case memory_order_acq_rel:
+ __asm__ __volatile__ ("" ::: "memory");
+ break;
+ case memory_order_consume:
+ break;
+ case memory_order_seq_cst:
+ __asm__ __volatile__ (BOOST_ATOMIC_X86_FENCE_INSTR ::: "memory");
+ break;
+ default:;
     }
 }
 
@@ -329,7 +340,10 @@
     fetch_and(value_type v, memory_order order = memory_order_seq_cst) volatile
     {
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp & v, order, memory_order_relaxed));
+ for(; !compare_exchange_weak(tmp, tmp & v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
     }
 
@@ -337,7 +351,10 @@
     fetch_or(value_type v, memory_order order = memory_order_seq_cst) volatile
     {
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp | v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp | v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
     }
 
@@ -345,7 +362,10 @@
     fetch_xor(value_type v, memory_order order = memory_order_seq_cst) volatile
     {
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp ^ v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp ^ v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
     }
 
@@ -457,7 +477,10 @@
     fetch_and(value_type v, memory_order order = memory_order_seq_cst) volatile
     {
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp & v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp & v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
     }
 
@@ -465,7 +488,10 @@
     fetch_or(value_type v, memory_order order = memory_order_seq_cst) volatile
     {
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp | v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp | v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
     }
 
@@ -473,7 +499,10 @@
     fetch_xor(value_type v, memory_order order = memory_order_seq_cst) volatile
     {
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp ^ v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp ^ v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
     }
 
@@ -585,7 +614,10 @@
     fetch_and(value_type v, memory_order order = memory_order_seq_cst) volatile
     {
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp & v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp & v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
     }
 
@@ -593,7 +625,10 @@
     fetch_or(value_type v, memory_order order = memory_order_seq_cst) volatile
     {
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp | v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp | v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
     }
 
@@ -601,7 +636,10 @@
     fetch_xor(value_type v, memory_order order = memory_order_seq_cst) volatile
     {
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp ^ v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp ^ v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
     }
 
@@ -714,7 +752,10 @@
     fetch_and(value_type v, memory_order order = memory_order_seq_cst) volatile
     {
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp & v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp & v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
     }
 
@@ -722,7 +763,10 @@
     fetch_or(value_type v, memory_order order = memory_order_seq_cst) volatile
     {
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp | v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp | v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
     }
 
@@ -730,7 +774,10 @@
     fetch_xor(value_type v, memory_order order = memory_order_seq_cst) volatile
     {
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp ^ v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp ^ v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
     }
 
@@ -1570,8 +1617,10 @@
 platform_store64(T value, volatile T * ptr)
 {
     T expected = *ptr;
- do {
- } while (!platform_cmpxchg64_strong(expected, value, ptr));
+ for (; !platform_cmpxchg64_strong(expected, value, ptr);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
 }
 
 template<typename T>
@@ -1579,8 +1628,10 @@
 platform_load64(const volatile T * ptr)
 {
     T expected = *ptr;
- do {
- } while (!platform_cmpxchg64_strong(expected, expected, const_cast<volatile T*>(ptr)));
+ for (; !platform_cmpxchg64_strong(expected, expected, const_cast<volatile T*>(ptr));)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
     return expected;
 }
 

Modified: branches/release/boost/atomic/detail/lockpool.hpp
==============================================================================
--- branches/release/boost/atomic/detail/lockpool.hpp (original)
+++ branches/release/boost/atomic/detail/lockpool.hpp 2012-12-21 15:14:10 EST (Fri, 21 Dec 2012)
@@ -69,8 +69,12 @@
         explicit
         scoped_lock(const volatile void * addr) : flag_(get_lock_for(addr))
         {
- do {
- } while (flag_.test_and_set(memory_order_acquire));
+ for (; flag_.test_and_set(memory_order_acquire);)
+ {
+#if defined(BOOST_ATOMIC_X86_PAUSE)
+ BOOST_ATOMIC_X86_PAUSE();
+#endif
+ }
         }
 
         ~scoped_lock(void)

Modified: branches/release/boost/atomic/detail/windows.hpp
==============================================================================
--- branches/release/boost/atomic/detail/windows.hpp (original)
+++ branches/release/boost/atomic/detail/windows.hpp 2012-12-21 15:14:10 EST (Fri, 21 Dec 2012)
@@ -28,6 +28,14 @@
 namespace atomics {
 namespace detail {
 
+#if defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_IX86))
+extern "C" void _mm_pause(void);
+#pragma intrinsic(_mm_pause)
+#define BOOST_ATOMIC_X86_PAUSE() _mm_pause()
+#else
+#define BOOST_ATOMIC_X86_PAUSE()
+#endif
+
 // Define hardware barriers
 #if defined(_MSC_VER) && (defined(_M_AMD64) || (defined(_M_IX86) && defined(_M_IX86_FP) && _M_IX86_FP >= 2))
 extern "C" void _mm_mfence(void);
@@ -53,174 +61,40 @@
 
 #define BOOST_ATOMIC_READ_WRITE_BARRIER() _ReadWriteBarrier()
 
-#if _MSC_VER >= 1400
-
-extern "C" void _ReadBarrier();
-#pragma intrinsic(_ReadBarrier)
-extern "C" void _WriteBarrier();
-#pragma intrinsic(_WriteBarrier)
-
-#define BOOST_ATOMIC_READ_BARRIER() _ReadBarrier()
-#define BOOST_ATOMIC_WRITE_BARRIER() _WriteBarrier()
-
-#endif
 #endif
 
 #ifndef BOOST_ATOMIC_READ_WRITE_BARRIER
 #define BOOST_ATOMIC_READ_WRITE_BARRIER()
 #endif
-#ifndef BOOST_ATOMIC_READ_BARRIER
-#define BOOST_ATOMIC_READ_BARRIER() BOOST_ATOMIC_READ_WRITE_BARRIER()
-#endif
-#ifndef BOOST_ATOMIC_WRITE_BARRIER
-#define BOOST_ATOMIC_WRITE_BARRIER() BOOST_ATOMIC_READ_WRITE_BARRIER()
-#endif
-
-// MSVC (up to 2012, inclusively) optimizer generates a very poor code for switch-case in fence functions.
-// Issuing unconditional compiler barriers generates better code. We may re-enable the main branch if MSVC optimizer improves.
-#ifdef BOOST_MSVC
-#define BOOST_ATOMIC_DETAIL_BAD_SWITCH_CASE_OPTIMIZER
-#endif
 
 BOOST_FORCEINLINE void
-platform_fence_before(memory_order order)
+platform_fence_before(memory_order)
 {
-#ifdef BOOST_ATOMIC_DETAIL_BAD_SWITCH_CASE_OPTIMIZER
-
     BOOST_ATOMIC_READ_WRITE_BARRIER();
-
-#else
-
- switch(order)
- {
- case memory_order_relaxed:
- case memory_order_consume:
- case memory_order_acquire:
- break;
- case memory_order_release:
- case memory_order_acq_rel:
- BOOST_ATOMIC_WRITE_BARRIER();
- /* release */
- break;
- case memory_order_seq_cst:
- BOOST_ATOMIC_READ_WRITE_BARRIER();
- /* seq */
- break;
- }
-
-#endif
 }
 
 BOOST_FORCEINLINE void
-platform_fence_after(memory_order order)
+platform_fence_after(memory_order)
 {
-#ifdef BOOST_ATOMIC_DETAIL_BAD_SWITCH_CASE_OPTIMIZER
-
     BOOST_ATOMIC_READ_WRITE_BARRIER();
-
-#else
-
- switch(order)
- {
- case memory_order_relaxed:
- case memory_order_release:
- break;
- case memory_order_consume:
- case memory_order_acquire:
- case memory_order_acq_rel:
- BOOST_ATOMIC_READ_BARRIER();
- break;
- case memory_order_seq_cst:
- BOOST_ATOMIC_READ_WRITE_BARRIER();
- /* seq */
- break;
- }
-
-#endif
 }
 
 BOOST_FORCEINLINE void
-platform_fence_before_store(memory_order order)
+platform_fence_before_store(memory_order)
 {
-#ifdef BOOST_ATOMIC_DETAIL_BAD_SWITCH_CASE_OPTIMIZER
-
- BOOST_ATOMIC_WRITE_BARRIER();
-
-#else
-
- switch(order)
- {
- case memory_order_relaxed:
- case memory_order_acquire:
- case memory_order_consume:
- break;
- case memory_order_acq_rel:
- case memory_order_release:
- case memory_order_seq_cst:
- BOOST_ATOMIC_WRITE_BARRIER();
- break;
- }
-
-#endif
+ BOOST_ATOMIC_READ_WRITE_BARRIER();
 }
 
 BOOST_FORCEINLINE void
-platform_fence_after_store(memory_order order)
+platform_fence_after_store(memory_order)
 {
-#ifdef BOOST_ATOMIC_DETAIL_BAD_SWITCH_CASE_OPTIMIZER
-
- BOOST_ATOMIC_WRITE_BARRIER();
- if (order == memory_order_seq_cst)
- x86_full_fence();
-
-#else
-
- switch(order)
- {
- case memory_order_relaxed:
- case memory_order_acquire:
- case memory_order_consume:
- break;
- case memory_order_acq_rel:
- case memory_order_release:
- BOOST_ATOMIC_WRITE_BARRIER();
- break;
- case memory_order_seq_cst:
- x86_full_fence();
- break;
- }
-
-#endif
+ BOOST_ATOMIC_READ_WRITE_BARRIER();
 }
 
 BOOST_FORCEINLINE void
 platform_fence_after_load(memory_order order)
 {
-#ifdef BOOST_ATOMIC_DETAIL_BAD_SWITCH_CASE_OPTIMIZER
-
- BOOST_ATOMIC_READ_BARRIER();
- if (order == memory_order_seq_cst)
- x86_full_fence();
-
-#else
-
- switch(order)
- {
- case memory_order_relaxed:
- case memory_order_consume:
- break;
- case memory_order_acquire:
- case memory_order_acq_rel:
- BOOST_ATOMIC_READ_BARRIER();
- break;
- case memory_order_release:
- break;
- case memory_order_seq_cst:
- x86_full_fence();
- break;
- }
-
-#endif
+ BOOST_ATOMIC_READ_WRITE_BARRIER();
 }
 
 } // namespace detail
@@ -230,107 +104,45 @@
 BOOST_FORCEINLINE void
 atomic_thread_fence(memory_order order)
 {
-#ifdef BOOST_ATOMIC_DETAIL_BAD_SWITCH_CASE_OPTIMIZER
-
     BOOST_ATOMIC_READ_WRITE_BARRIER();
     if (order == memory_order_seq_cst)
         atomics::detail::x86_full_fence();
-
-#else
-
- switch (order)
- {
- case memory_order_relaxed:
- break;
- case memory_order_consume:
- case memory_order_acquire:
- BOOST_ATOMIC_READ_BARRIER();
- break;
- case memory_order_release:
- BOOST_ATOMIC_WRITE_BARRIER();
- break;
- case memory_order_acq_rel:
- BOOST_ATOMIC_READ_WRITE_BARRIER();
- break;
- case memory_order_seq_cst:
- atomics::detail::x86_full_fence();
- break;
- }
-
-#endif
 }
 
 #define BOOST_ATOMIC_SIGNAL_FENCE 2
 BOOST_FORCEINLINE void
-atomic_signal_fence(memory_order order)
+atomic_signal_fence(memory_order)
 {
-#ifdef BOOST_ATOMIC_DETAIL_BAD_SWITCH_CASE_OPTIMIZER
-
     BOOST_ATOMIC_READ_WRITE_BARRIER();
-
-#else
-
- switch (order)
- {
- case memory_order_relaxed:
- break;
- case memory_order_consume:
- case memory_order_acquire:
- BOOST_ATOMIC_READ_BARRIER();
- break;
- case memory_order_release:
- BOOST_ATOMIC_WRITE_BARRIER();
- break;
- case memory_order_acq_rel:
- case memory_order_seq_cst:
- BOOST_ATOMIC_READ_WRITE_BARRIER();
- break;
- }
-
-#endif
 }
 
 #undef BOOST_ATOMIC_READ_WRITE_BARRIER
-#undef BOOST_ATOMIC_READ_BARRIER
-#undef BOOST_ATOMIC_WRITE_BARRIER
 
 class atomic_flag
 {
 private:
     atomic_flag(const atomic_flag &) /* = delete */ ;
     atomic_flag & operator=(const atomic_flag &) /* = delete */ ;
-#ifdef BOOST_ATOMIC_INTERLOCKED_EXCHANGE8
- char v_;
-#else
- long v_;
-#endif
+ uint32_t v_;
 public:
     atomic_flag(void) : v_(0) {}
 
- void
- clear(memory_order order = memory_order_seq_cst) volatile
- {
- atomics::detail::platform_fence_before_store(order);
-#ifdef BOOST_ATOMIC_INTERLOCKED_EXCHANGE8
- BOOST_ATOMIC_INTERLOCKED_EXCHANGE8(&v_, 0);
-#else
- BOOST_ATOMIC_INTERLOCKED_EXCHANGE(&v_, 0);
-#endif
- atomics::detail::platform_fence_after_store(order);
- }
-
     bool
     test_and_set(memory_order order = memory_order_seq_cst) volatile
     {
         atomics::detail::platform_fence_before(order);
-#ifdef BOOST_ATOMIC_INTERLOCKED_EXCHANGE8
- const char old = BOOST_ATOMIC_INTERLOCKED_EXCHANGE8(&v_, 1);
-#else
- const long old = BOOST_ATOMIC_INTERLOCKED_EXCHANGE(&v_, 1);
-#endif
+ const uint32_t old = (uint32_t)BOOST_ATOMIC_INTERLOCKED_EXCHANGE(&v_, 1);
         atomics::detail::platform_fence_after(order);
         return old != 0;
     }
+
+ void
+ clear(memory_order order = memory_order_seq_cst) volatile
+ {
+ atomics::detail::platform_fence_before_store(order);
+ BOOST_ATOMIC_INTERLOCKED_EXCHANGE(&v_, 0);
+ atomics::detail::platform_fence_after_store(order);
+ }
 };
 
 } // namespace boost
@@ -478,7 +290,10 @@
         return v;
 #else
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp & v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp & v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
 #endif
     }
@@ -498,7 +313,10 @@
         return v;
 #else
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp | v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp | v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
 #endif
     }
@@ -518,7 +336,10 @@
         return v;
 #else
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp ^ v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp ^ v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
 #endif
     }
@@ -655,7 +476,10 @@
         return v;
 #else
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp & v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp & v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
 #endif
     }
@@ -675,7 +499,10 @@
         return v;
 #else
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp | v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp | v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
 #endif
     }
@@ -695,7 +522,10 @@
         return v;
 #else
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp ^ v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp ^ v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
 #endif
     }
@@ -807,7 +637,10 @@
         return v;
 #else
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp & v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp & v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
 #endif
     }
@@ -822,7 +655,10 @@
         return v;
 #else
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp | v, order, memory_order_relaxed));
+ for(; !compare_exchange_weak(tmp, tmp | v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
 #endif
     }
@@ -837,7 +673,10 @@
         return v;
 #else
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp ^ v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp ^ v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
 #endif
     }
@@ -951,7 +790,10 @@
         return v;
 #else
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp & v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp & v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
 #endif
     }
@@ -966,7 +808,10 @@
         return v;
 #else
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp | v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp | v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
 #endif
     }
@@ -981,7 +826,10 @@
         return v;
 #else
         value_type tmp = load(memory_order_relaxed);
- do {} while(!compare_exchange_weak(tmp, tmp ^ v, order, memory_order_relaxed));
+ for (; !compare_exchange_weak(tmp, tmp ^ v, order, memory_order_relaxed);)
+ {
+ BOOST_ATOMIC_X86_PAUSE();
+ }
         return tmp;
 #endif
     }


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk