Attached are some Windows CE patches for the thread rewrite code:
- boost\thread\win32\thread_primitives.hpp:
Windows CE does not supply the ANSI version of most Win32 functions 
(only the wide-char versions).  In addition, on WinCE certain functions 
like GetCurrentThread, GetCurrentProcess, SetEvent, ResetEvent, and a 
couple others are inline functions declared in the kfuncs.h header file 
(rather than functions imported from the Win32 DLLs).  The patch 
implements workarounds for these issues.
- boost\thread\win32\thread_heap_alloc.hpp:
In DEBUG debugs, Windows CE headers define a macro for HeapAlloc to 
remap it to a HeapAllocTrace function.  This wreaks havoc on the code to 
pull HeapAlloc into Boost.Thread's win32 namespace.  At this point, the 
best solution I could see was to undef this HeapAlloc macro if present.
- libs\thread\src\win32\thread.cpp:
Windows CE does not have a process.h header.  It also does not support 
the __declspec(thread) construct.  Finally, _beginthreadex is not part 
of the WinCE CRT so an emulation of this function using ::CreateThread 
is included in the patch.
- libs\thread\src\win32\tss.cpp:
- libs\thread\src\win32\tss_hooks.cpp:
Windows CE does not define the TLS_OUT_OF_INDEXES constant.  The patch 
defines this constant to the right value for WinCE builds.
- libs\thread\src\tss_null.cpp:
- libs\thread\src\win32\tss_pe.cpp:
The Windows CE linker and loader do not seem to support all the fancy 
magic involved in registering the TSS callback handler with the 
static-link version of the library (DLL version works fine).  The patch 
reflects that automatic tss cleanup is not available in WinCE version of 
the static-link library.
Thanks,
-Dave
Index: thread_heap_alloc.hpp
===================================================================
--- thread_heap_alloc.hpp	(revision 40640)
+++ thread_heap_alloc.hpp	(working copy)
@@ -25,6 +25,10 @@
 
 #else
 
+#ifdef HeapAlloc
+#undef HeapAlloc
+#endif
+
 namespace boost
 {
     namespace detail
Index: thread_primitives.hpp
===================================================================
--- thread_primitives.hpp	(revision 40611)
+++ thread_primitives.hpp	(working copy)
@@ -28,9 +28,15 @@
             unsigned const timeout=WAIT_TIMEOUT;
             handle const invalid_handle_value=INVALID_HANDLE_VALUE;
 
+# ifdef BOOST_NO_ANSI_APIS
+            using ::CreateMutexW;
+            using ::CreateEventW;
+            using ::CreateSemaphoreW;
+# else
             using ::CreateMutexA;
             using ::CreateEventA;
             using ::CreateSemaphoreA;
+# endif
             using ::CloseHandle;
             using ::ReleaseMutex;
             using ::ReleaseSemaphore;
@@ -50,6 +56,29 @@
     }
 }
 #elif defined( WIN32 ) || defined( _WIN32 ) || defined( __WIN32__ )
+
+# ifdef UNDER_CE
+#  ifndef WINAPI
+#   ifndef _WIN32_WCE_EMULATION
+#    define WINAPI  __cdecl	// Note this doesn't match the desktop definition
+#   else
+#    define WINAPI  __stdcall
+#   endif
+#  endif
+
+#  ifdef __cplusplus
+extern "C" {
+#  endif
+typedef int BOOL;
+typedef unsigned long DWORD;
+typedef void* HANDLE;
+
+#  include <kfuncs.h>
+#  ifdef __cplusplus
+}
+#  endif
+# endif
+
 namespace boost
 {
     namespace detail
@@ -70,25 +99,42 @@
             extern "C"
             {
                 struct _SECURITY_ATTRIBUTES;
+# ifdef BOOST_NO_ANSI_APIS
+                __declspec(dllimport) void* __stdcall CreateMutexW(_SECURITY_ATTRIBUTES*,int,wchar_t const*);
+                __declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES*,long,long,wchar_t const*);
+                __declspec(dllimport) void* __stdcall CreateEventW(_SECURITY_ATTRIBUTES*,int,int,wchar_t const*);
+# else
                 __declspec(dllimport) void* __stdcall CreateMutexA(_SECURITY_ATTRIBUTES*,int,char const*);
                 __declspec(dllimport) void* __stdcall CreateSemaphoreA(_SECURITY_ATTRIBUTES*,long,long,char const*);
                 __declspec(dllimport) void* __stdcall CreateEventA(_SECURITY_ATTRIBUTES*,int,int,char const*);
+# endif               
+
                 __declspec(dllimport) int __stdcall CloseHandle(void*);
                 __declspec(dllimport) int __stdcall ReleaseMutex(void*);
-                __declspec(dllimport) unsigned long __stdcall GetCurrentProcessId();
-                __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId();
                 __declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void*,unsigned long);
+                __declspec(dllimport) unsigned long __stdcall WaitForMultipleObjects(unsigned long nCount,void* const * lpHandles,int bWaitAll,unsigned long dwMilliseconds);
                 __declspec(dllimport) int __stdcall ReleaseSemaphore(void*,long,long*);
-                __declspec(dllimport) void* __stdcall GetCurrentThread();
-                __declspec(dllimport) void* __stdcall GetCurrentProcess();
                 __declspec(dllimport) int __stdcall DuplicateHandle(void*,void*,void*,void**,unsigned long,int,unsigned long);
                 __declspec(dllimport) unsigned long __stdcall SleepEx(unsigned long,int);
                 __declspec(dllimport) void __stdcall Sleep(unsigned long);
                 typedef void (__stdcall *queue_user_apc_callback_function)(ulong_ptr);
                 __declspec(dllimport) unsigned long __stdcall QueueUserAPC(queue_user_apc_callback_function,void*,ulong_ptr);
+
+# ifndef UNDER_CE
+                __declspec(dllimport) unsigned long __stdcall GetCurrentProcessId();
+                __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId();
+                __declspec(dllimport) void* __stdcall GetCurrentThread();
+                __declspec(dllimport) void* __stdcall GetCurrentProcess();
                 __declspec(dllimport) int __stdcall SetEvent(void*);
                 __declspec(dllimport) int __stdcall ResetEvent(void*);
-                __declspec(dllimport) unsigned long __stdcall WaitForMultipleObjects(unsigned long nCount,void* const * lpHandles,int bWaitAll,unsigned long dwMilliseconds);
+# else
+                using ::GetCurrentProcessId;
+                using ::GetCurrentThreadId;
+                using ::GetCurrentThread;
+                using ::GetCurrentProcess;
+                using ::SetEvent;
+                using ::ResetEvent;
+# endif                
             }
         }
     }
@@ -117,7 +163,11 @@
             
             inline handle create_anonymous_event(event_type type,initial_event_state state)
             {
+#if !defined(BOOST_NO_ANSI_APIS)  
                 handle const res=win32::CreateEventA(0,type,state,0);
+#else
+                handle const res=win32::CreateEventW(0,type,state,0);
+#endif                
                 if(!res)
                 {
                     throw thread_resource_error();
@@ -127,7 +177,11 @@
 
             inline handle create_anonymous_semaphore(long initial_count,long max_count)
             {
+#if !defined(BOOST_NO_ANSI_APIS)  
                 handle const res=CreateSemaphoreA(NULL,initial_count,max_count,NULL);
+#else
+                handle const res=CreateSemaphoreW(NULL,initial_count,max_count,NULL);
+#endif               
                 if(!res)
                 {
                     throw thread_resource_error();
Index: tss.cpp
===================================================================
--- tss.cpp	(revision 40611)
+++ tss.cpp	(working copy)
@@ -20,6 +20,9 @@
 #if defined(BOOST_HAS_WINTHREADS)
 #   include <windows.h>
 #   include <boost/thread/detail/tss_hooks.hpp>
+#   if defined(UNDER_CE) && !defined(TLS_OUT_OF_INDEXES)
+#       define TLS_OUT_OF_INDEXES 0xFFFFFFFF
+#   endif
 #endif
 
 namespace {
Index: tss_hooks.cpp
===================================================================
--- tss_hooks.cpp	(revision 40611)
+++ tss_hooks.cpp	(working copy)
@@ -19,6 +19,10 @@
     #define WIN32_LEAN_AND_MEAN
     #include <windows.h>
 
+#   if defined(UNDER_CE) && !defined(TLS_OUT_OF_INDEXES)
+#       define TLS_OUT_OF_INDEXES 0xFFFFFFFF
+#   endif
+
     namespace
     {
         class CScopedCSLock
Index: tss_pe.cpp
===================================================================
--- tss_pe.cpp	(revision 40611)
+++ tss_pe.cpp	(working copy)
@@ -5,7 +5,7 @@
 
 #include <boost/thread/detail/config.hpp>
 
-#if defined(BOOST_HAS_WINTHREADS) && defined(BOOST_THREAD_BUILD_LIB) && defined(_MSC_VER)
+#if defined(BOOST_HAS_WINTHREADS) && defined(BOOST_THREAD_BUILD_LIB) && defined(_MSC_VER) && !defined(UNDER_CE)
 
     #include <boost/thread/detail/tss_hooks.hpp>
 
Index: thread.cpp
===================================================================
--- thread.cpp	(revision 40611)
+++ thread.cpp	(working copy)
@@ -6,7 +6,9 @@
 #include <boost/thread/thread.hpp>
 #include <algorithm>
 #include <windows.h>
+#ifndef UNDER_CE
 #include <process.h>
+#endif
 #include <stdio.h>
 #include <boost/thread/once.hpp>
 #include <boost/assert.hpp>
@@ -15,7 +17,7 @@
 {
     namespace
     {
-#ifdef _MSC_VER
+#if defined(_MSC_VER) && !defined(UNDER_CE)
         __declspec(thread) detail::thread_data_base* current_thread_data=0;
         detail::thread_data_base* get_current_thread_data()
         {
@@ -59,6 +61,42 @@
             BOOST_ASSERT(res);
         }
 #endif
+
+
+#ifdef BOOST_NO_THREADEX
+// Windows CE doesn't define _beginthreadex
+
+        struct ThreadProxyData
+        {
+            typedef unsigned (__stdcall* func)(void*);
+            func start_address_;
+            void* arglist_;
+            ThreadProxyData(func start_address,void* arglist) : start_address_(start_address), arglist_(arglist) {}
+        };
+
+        DWORD WINAPI ThreadProxy(LPVOID args)
+        {
+            ThreadProxyData* data=reinterpret_cast<ThreadProxyData*>(args);
+            DWORD ret=data->start_address_(data->arglist_);
+            delete data;
+            return ret;
+        }
+        
+        typedef void* uintptr_t;
+
+        inline uintptr_t const _beginthreadex(void* security, unsigned stack_size, unsigned (__stdcall* start_address)(void*),
+            void* arglist, unsigned initflag, unsigned* thrdaddr)
+        {
+            DWORD threadID;
+            HANDLE hthread=CreateThread(static_cast<LPSECURITY_ATTRIBUTES>(security),stack_size,ThreadProxy,
+                new ThreadProxyData(start_address,arglist),initflag,&threadID);
+            if (hthread!=0)
+                *thrdaddr=threadID;
+            return reinterpret_cast<uintptr_t const>(hthread);
+        }
+
+#endif
+
     }
 
     void thread::yield()
@@ -170,7 +208,6 @@
             }
             ~externally_launched_thread()
             {
-                OutputDebugString("External thread finished\n");
             }
             
             void run()
Index: tss_null.cpp
===================================================================
--- tss_null.cpp	(revision 40640)
+++ tss_null.cpp	(working copy)
@@ -5,7 +5,7 @@
 
 #include <boost/thread/detail/config.hpp>
 
-#if defined(BOOST_HAS_WINTHREADS) && (defined(BOOST_THREAD_BUILD_LIB) || defined(BOOST_THREAD_TEST)) && !defined(_MSC_VER)
+#if defined(BOOST_HAS_WINTHREADS) && (defined(BOOST_THREAD_BUILD_LIB) || defined(BOOST_THREAD_TEST)) && !(defined(_MSC_VER) && !defined(UNDER_CE))
 
     /*
     This file is a "null" implementation of tss cleanup; it's