#define USE_VTUNE #define USE_BOOST //#define USE_STDLIB #include #include #include #include #ifdef USE_STDLIB #include #endif #include #include #ifdef USE_VTUNE #include "vtuneapi.h" #endif #ifdef USE_BOOST #include "boost/intrusive/ilist.hpp" #endif #include "boost/shared_array.hpp" #ifndef USE_VTUNE #define VTResume() #define VTPause() #endif #ifdef USE_BOOST struct my_tag; struct MyNode : public boost::intrusive::ilist_base_hook { MyNode(int n) : num(n) { } int num; bool operator<(const MyNode &rhs) const { return num < rhs.num; } }; typedef boost::intrusive::ilist::value_traits > list; #elif defined(USE_STDLIB) typedef int MyNode; typedef std::list list; #endif #define CACHEBLOCK 128 // prefetch block size #define PREFETCH_BLOCK_SIZE CACHEBLOCK * 64 #pragma optimize("", off) int fetch; void block_prefetch(const void *addr) { // Grab every 64th address to hit each cache line once. const int *a = (int *)addr; fetch += a[0] + a[16] + a[32] + a[48] + a[64] + a[80] + a[96] + a[112] + a[128] + a[144] + a[160] + a[176] + a[192] + a[208] + a[224] + a[240]; a += 256; fetch += a[0] + a[16] + a[32] + a[48] + a[64] + a[80] + a[96] + a[112] + a[128] + a[144] + a[160] + a[176] + a[192] + a[208] + a[224] + a[240]; a += 256; fetch += a[0] + a[16] + a[32] + a[48] + a[64] + a[80] + a[96] + a[112] + a[128] + a[144] + a[160] + a[176] + a[192] + a[208] + a[224] + a[240]; a += 256; fetch += a[0] + a[16] + a[32] + a[48] + a[64] + a[80] + a[96] + a[112] + a[128] + a[144] + a[160] + a[176] + a[192] + a[208] + a[224] + a[240]; } #pragma optimize("", on) int main(void) { const int NumTestNodes = 1000000; std::srand(static_cast(std::time(0))); std::vector my_nodes; std::generate_n(std::back_inserter(my_nodes), NumTestNodes, std::rand); std::vector > fragmented_memory; list my_list; for (int i = 0; i < NumTestNodes; ++i) { // Simulate fragmented memory boost::shared_array arr(new int[rand() % 500]); fragmented_memory.push_back(arr); my_list.push_back(my_nodes[i]); } // Clear the cache boost::shared_array dummy(new char[4096]); block_prefetch(dummy.get()); std::clock_t start_time = std::clock(); VTResume(); my_list.sort(); VTPause(); std::cout << "time for sorting: " << static_cast(std::clock() - start_time) / CLOCKS_PER_SEC << std::endl; my_list.clear(); }