/* Generic version of spreadsort Copyright (c) 2002-2008 Steven J. Ross Some improvements suggested by: Phil Endecott and Frank Gennari Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef SPREADSORT_H #define SPREADSORT_H #include "constants.h" #include #include //This only works on unsigned data types template inline unsigned RoughLog2Size(const T& input) { unsigned result = 0; //The && is necessary on some compilers to avoid infinite loops; it doesn't significantly impair performance while((input >> result) && (result < (8*sizeof(T)))) ++result; return result; } //Gets the maximum size which we'll call spreadsort on to control worst-case performance //Maintains both a minimum size to recurse and a check of distribution size versus count //This is called for a set of bins, instead of bin-by-bin, to avoid performance overhead inline size_t GetMaxCount(unsigned log_range, size_t count) { unsigned divisor = RoughLog2Size(count); //Making sure the divisor is positive if(divisor > LOG_MEAN_BIN_SIZE) divisor -= LOG_MEAN_BIN_SIZE; else divisor = 1; unsigned relative_width = (LOG_CONST * log_range)/((divisor > MAX_SPLITS) ? MAX_SPLITS : divisor); //Don't try to bitshift more than the size of an element if((8*sizeof(size_t)) <= relative_width) relative_width = (8*sizeof(size_t)) - 1; return 1 << ((relative_width < (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT)) ? (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT) : relative_width); } //Find the minimum and maximum template inline void FindExtremes(RandomAccessIter current, RandomAccessIter last, RandomAccessIter & max, RandomAccessIter & min) { min = max = current; //Start from the second item, as max and min are initialized to the first while(++current < last) { if(*max < *current) max = current; else if(*current < *min) min = current; } } template inline void FindExtremes(RandomAccessIter current, RandomAccessIter last, RandomAccessIter & max, RandomAccessIter & min, Compare comp) { min = max = current; while(++current < last) { if(comp(*max, *current)) max = current; else if(comp(*current, *min)) min = current; } } //Implementation for recursive sorting template inline void SpreadSortRec(RandomAccessIter first, RandomAccessIter last, std::vector &bin_cache, size_t cache_offset , std::vector &bin_sizes) { size_t bin_count; unsigned log_range; //This step is roughly 10% of runtime, but it helps avoid worst-case behavior and improve behavior with real data //If you know the maximum and minimum ahead of time, you can pass those values in and skip this step for the first iteration RandomAccessIter max, min; FindExtremes(first, last, max, min); //max and min will be the same (the first item) iff all values are equivalent if(max == min) return; size_t u; int log_divisor; RandomAccessIter * target_bin; unsigned count = last - first; log_range = RoughLog2Size((size_t)(*max >> 0) - (*min >> 0)); //If we can finish in one iteration without exceeding either (2 to the MAX_SPLITS) or n bins, do so if((log_divisor = log_range - RoughLog2Size(count)) <= 0 && log_range < MAX_SPLITS) log_divisor = 0; else { //otherwise divide the data into an optimized number of pieces log_divisor += LOG_MEAN_BIN_SIZE; if(log_divisor < 0) log_divisor = 0; //Cannot exceed MAX_SPLITS or cache misses slow down bin lookups dramatically if((log_range - log_divisor) > MAX_SPLITS) log_divisor = log_range - MAX_SPLITS; } div_type div_min = *min >> log_divisor; div_type div_max = *max >> log_divisor; bin_count = div_max - div_min + 1; //Assure space for the size of each bin, followed by initializing sizes if(bin_count > bin_sizes.size()) bin_sizes.resize(bin_count); for(u = 0; u < bin_count; u++) bin_sizes[u] = 0; //Make sure there is space for the bins size_t cache_end = cache_offset + bin_count; if(cache_end > bin_cache.size()) bin_cache.resize(cache_end); RandomAccessIter *bins = &(bin_cache[cache_offset]); //Calculating the size of each bin; this takes roughly 10% of runtime for (RandomAccessIter current = first; current != last;) bin_sizes[(*(current++) >> log_divisor) - div_min]++; //Assign the bin positions bins[0] = first; for(u = 0; u < bin_count - 1; u++) bins[u + 1] = bins[u] + bin_sizes[u]; //Swap into place //This dominates runtime, mostly in the swap and bin lookups RandomAccessIter nextbinstart = first; for(unsigned ii = 0; ii < bin_count; ++ii) { RandomAccessIter * local_bin = bins + ii; nextbinstart += bin_sizes[ii]; //Iterating over each element in this bin for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { //Swapping elements in current into place until the correct element has been swapped in for(target_bin = (bins + ((*current >> log_divisor) - div_min)); target_bin != local_bin; target_bin = bins + ((*current >> log_divisor) - div_min)) { //3-way swap; this is about 1% faster than a 2-way swap with integers //The main advantage is less copies are involved per item put in the correct place data_type tmp; RandomAccessIter b = (*target_bin)++; RandomAccessIter * b_bin = bins + ((*b >> log_divisor) - div_min); if (b_bin != local_bin) { RandomAccessIter c = (*b_bin)++; tmp = *c; *c = *b; } else tmp = *b; *b = *current; *current = tmp; } } *local_bin = nextbinstart; } //If we've bucketsorted, the array is sorted and we should skip recursion if(!log_divisor) return; //Recursing; log_divisor is the remaining range size_t max_count = GetMaxCount(log_divisor, last - first); RandomAccessIter lastPos = first; for(size_t u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) { size_t count = bin_cache[u] - lastPos; //don't sort unless there are at least two items to compare if(count < 2) continue; //using std::sort if its worst-case is better if(count < max_count) std::sort(lastPos, bin_cache[u]); else SpreadSortRec(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes); } } //Functor implementation for recursive sorting template inline void SpreadSortRec(RandomAccessIter first, RandomAccessIter last, std::vector &bin_cache, size_t cache_offset , std::vector &bin_sizes, RightShift shift, Compare comp) { size_t bin_count; unsigned log_range; RandomAccessIter max, min; FindExtremes(first, last, max, min, comp); if(max == min) return; size_t u; int log_divisor; RandomAccessIter * target_bin; unsigned count = last - first; log_range = RoughLog2Size((size_t)(shift(*max, 0)) - (shift(*min, 0))); if((log_divisor = log_range - RoughLog2Size(count)) <= 0 && log_range < MAX_SPLITS) log_divisor = 0; else { log_divisor += LOG_MEAN_BIN_SIZE; if(log_divisor < 0) log_divisor = 0; if((log_range - log_divisor) > MAX_SPLITS) log_divisor = log_range - MAX_SPLITS; } div_type div_min = shift(*min, log_divisor); div_type div_max = shift(*max, log_divisor); bin_count = div_max - div_min + 1; //Assure space for the size of each bin, followed by initializing sizes if(bin_count > bin_sizes.size()) bin_sizes.resize(bin_count); for(u = 0; u < bin_count; u++) bin_sizes[u] = 0; size_t cache_end = cache_offset + bin_count; if(cache_end > bin_cache.size()) bin_cache.resize(cache_end); RandomAccessIter *bins = &(bin_cache[cache_offset]); //Calculating the size of each bin for (RandomAccessIter current = first; current != last;) bin_sizes[shift(*(current++), log_divisor) - div_min]++; bins[0] = first; for(u = 0; u < bin_count - 1; u++) bins[u + 1] = bins[u] + bin_sizes[u]; //Swap into place RandomAccessIter nextbinstart = first; for(unsigned ii = 0; ii < bin_count; ++ii) { RandomAccessIter * local_bin = bins + ii; nextbinstart += bin_sizes[ii]; for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { for(target_bin = (bins + (shift(*current, log_divisor) - div_min)); target_bin != local_bin; target_bin = bins + (shift(*current, log_divisor) - div_min)) { data_type tmp; RandomAccessIter b = (*target_bin)++; RandomAccessIter * b_bin = bins + (shift(*b, log_divisor) - div_min); if (b_bin != local_bin) { RandomAccessIter c = (*b_bin)++; tmp = *c; *c = *b; } else tmp = *b; *b = *current; *current = tmp; } } *local_bin = nextbinstart; } if(!log_divisor) return; size_t max_count = GetMaxCount(log_divisor, last - first); RandomAccessIter lastPos = first; for(size_t u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) { size_t count = bin_cache[u] - lastPos; if(count < 2) continue; if(count < max_count) std::sort(lastPos, bin_cache[u], comp); else SpreadSortRec(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift, comp); } } //Functor implementation for recursive sorting with only Shift overridden template inline void SpreadSortRec(RandomAccessIter first, RandomAccessIter last, std::vector &bin_cache, size_t cache_offset , std::vector &bin_sizes, RightShift shift) { size_t bin_count; unsigned log_range; RandomAccessIter max, min; FindExtremes(first, last, max, min); if(max == min) return; size_t u; int log_divisor; RandomAccessIter * target_bin; unsigned count = last - first; log_range = RoughLog2Size((size_t)(shift(*max, 0)) - (shift(*min, 0))); if((log_divisor = log_range - RoughLog2Size(count)) <= 0 && log_range < MAX_SPLITS) log_divisor = 0; else { log_divisor += LOG_MEAN_BIN_SIZE; if(log_divisor < 0) log_divisor = 0; if((log_range - log_divisor) > MAX_SPLITS) log_divisor = log_range - MAX_SPLITS; } div_type div_min = shift(*min, log_divisor); div_type div_max = shift(*max, log_divisor); bin_count = div_max - div_min + 1; //Assure space for the size of each bin, followed by initializing sizes if(bin_count > bin_sizes.size()) bin_sizes.resize(bin_count); for(u = 0; u < bin_count; u++) bin_sizes[u] = 0; size_t cache_end = cache_offset + bin_count; if(cache_end > bin_cache.size()) bin_cache.resize(cache_end); RandomAccessIter *bins = &(bin_cache[cache_offset]); //Calculating the size of each bin for (RandomAccessIter current = first; current != last;) bin_sizes[shift(*(current++), log_divisor) - div_min]++; bins[0] = first; for(u = 0; u < bin_count - 1; u++) bins[u + 1] = bins[u] + bin_sizes[u]; //Swap into place RandomAccessIter nextbinstart = first; for(unsigned ii = 0; ii < bin_count; ++ii) { RandomAccessIter * local_bin = bins + ii; nextbinstart += bin_sizes[ii]; for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { for(target_bin = (bins + (shift(*current, log_divisor) - div_min)); target_bin != local_bin; target_bin = bins + (shift(*current, log_divisor) - div_min)) { data_type tmp; RandomAccessIter b = (*target_bin)++; RandomAccessIter * b_bin = bins + (shift(*b, log_divisor) - div_min); if (b_bin != local_bin) { RandomAccessIter c = (*b_bin)++; tmp = *c; *c = *b; } else tmp = *b; *b = *current; *current = tmp; } } *local_bin = nextbinstart; } if(!log_divisor) return; size_t max_count = GetMaxCount(log_divisor, last - first); RandomAccessIter lastPos = first; for(size_t u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) { size_t count = bin_cache[u] - lastPos; if(count < 2) continue; if(count < max_count) std::sort(lastPos, bin_cache[u]); else SpreadSortRec(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift); } } //Holds the bin vector and makes the initial recursive call template inline void SpreadSort(RandomAccessIter first, RandomAccessIter last, div_type, data_type) { std::vector bin_sizes; std::vector bin_cache; SpreadSortRec(first, last, bin_cache, 0, bin_sizes); } template inline void SpreadSort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, RightShift shift, Compare comp) { std::vector bin_sizes; std::vector bin_cache; SpreadSortRec(first, last, bin_cache, 0, bin_sizes, shift, comp); } template inline void SpreadSort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, RightShift shift) { std::vector bin_sizes; std::vector bin_cache; SpreadSortRec(first, last, bin_cache, 0, bin_sizes, shift); } //Top-level sorting call template inline void integer_sort(RandomAccessIter first, RandomAccessIter last) { //Don't sort if it's too small to optimize if(last - first < MIN_SORT_SIZE) std::sort(first, last); else SpreadSort(first, last, *first >> 0, *first); } //integer_sort with functors template inline void integer_sort(RandomAccessIter first, RandomAccessIter last, RightShift shift, Compare comp) { if(last - first < MIN_SORT_SIZE) std::sort(first, last, comp); else SpreadSort(first, last, shift(*first, 0), *first, shift, comp); } //integer_sort with RightShift functor template inline void integer_sort(RandomAccessIter first, RandomAccessIter last, RightShift shift) { if(last - first < MIN_SORT_SIZE) std::sort(first, last); else SpreadSort(first, last, shift(*first, 0), *first, shift); } //------------------------------------------------------float_sort code template inline void FindExtremes(RandomAccessIter current, RandomAccessIter last, div_type & max, div_type & min, RightShift shift) { min = max = shift(*current, 0); while(++current < last) { div_type value = shift(*current, 0); if(max < value) max = value; else if(value < min) min = value; } } //Sorting Negative Floats //Note that bins are iterated in reverse order because max_neg_float = min_neg_int template inline void NegativeFloatSortRec(RandomAccessIter first, RandomAccessIter last, std::vector &bin_cache, size_t cache_offset , std::vector &bin_sizes, RightShift shift) { div_type max, min; FindExtremes(first, last, max, min, shift); if(max == min) return; size_t u; int log_divisor; RandomAccessIter * target_bin; unsigned count = last - first; unsigned log_range = RoughLog2Size((size_t)(max) - min); if((log_divisor = log_range - RoughLog2Size(count)) <= 0 && log_range < MAX_SPLITS) log_divisor = 0; else { log_divisor += LOG_MEAN_BIN_SIZE; if(log_divisor < 0) log_divisor = 0; if((log_range - log_divisor) > MAX_SPLITS) log_divisor = log_range - MAX_SPLITS; } div_type div_min = min >> log_divisor; div_type div_max = max >> log_divisor; size_t bin_count = div_max - div_min + 1; //Assure space for the size of each bin, followed by initializing sizes if(bin_count > bin_sizes.size()) bin_sizes.resize(bin_count); for(u = 0; u < bin_count; u++) bin_sizes[u] = 0; size_t cache_end = cache_offset + bin_count; if(cache_end > bin_cache.size()) bin_cache.resize(cache_end); RandomAccessIter *bins = &(bin_cache[cache_offset]); //Calculating the size of each bin for (RandomAccessIter current = first; current != last;) bin_sizes[shift(*(current++), log_divisor) - div_min]++; bins[bin_count - 1] = first; for(int ii = bin_count - 2; ii >= 0; --ii) bins[ii] = bins[ii + 1] + bin_sizes[ii + 1]; //Swap into place RandomAccessIter nextbinstart = first; //The last bin will always have the correct elements in it for(int ii = bin_count - 1; ii >= 0; --ii) { RandomAccessIter * local_bin = bins + ii; nextbinstart += bin_sizes[ii]; for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { for(target_bin = (bins + (shift(*current, log_divisor) - div_min)); target_bin != local_bin; target_bin = bins + (shift(*current, log_divisor) - div_min)) { data_type tmp; RandomAccessIter b = (*target_bin)++; RandomAccessIter * b_bin = bins + (shift(*b, log_divisor) - div_min); if (b_bin != local_bin) { RandomAccessIter c = (*b_bin)++; tmp = *c; *c = *b; } else tmp = *b; *b = *current; *current = tmp; } } *local_bin = nextbinstart; } //Since we don't process the last bin, we need to update its end position bin_cache[cache_offset] = last; if(!log_divisor) return; size_t max_count = GetMaxCount(log_divisor, last - first); RandomAccessIter lastPos = first; for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) { size_t count = bin_cache[ii] - lastPos; if(count < 2) continue; if(count < max_count) std::sort(lastPos, bin_cache[ii]); else NegativeFloatSortRec(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift); } } //Functor implementation for recursive sorting template inline void FloatSortRec(RandomAccessIter first, RandomAccessIter last, std::vector &bin_cache, size_t cache_offset , std::vector &bin_sizes, RightShift shift) { div_type max, min; FindExtremes(first, last, max, min, shift); if(max == min) return; size_t u; int log_divisor; RandomAccessIter * target_bin; unsigned count = last - first; unsigned log_range = RoughLog2Size((size_t)(max) - min); if((log_divisor = log_range - RoughLog2Size(count)) <= 0 && log_range < MAX_SPLITS) log_divisor = 0; else { log_divisor += LOG_MEAN_BIN_SIZE; if(log_divisor < 0) log_divisor = 0; if((log_range - log_divisor) > MAX_SPLITS) log_divisor = log_range - MAX_SPLITS; } div_type div_min = min >> log_divisor; div_type div_max = max >> log_divisor; size_t bin_count = div_max - div_min + 1; //Assure space for the size of each bin, followed by initializing sizes if(bin_count > bin_sizes.size()) bin_sizes.resize(bin_count); for(u = 0; u < bin_count; u++) bin_sizes[u] = 0; size_t cache_end = cache_offset + bin_count; if(cache_end > bin_cache.size()) bin_cache.resize(cache_end); RandomAccessIter *bins = &(bin_cache[cache_offset]); //Calculating the size of each bin for (RandomAccessIter current = first; current != last;) bin_sizes[shift(*(current++), log_divisor) - div_min]++; //The index of the first positive bin div_type first_positive = (div_min < 0) ? -div_min : 0; //Reversing the order of the negative bins //Note that because of the negative/positive ordering direction flip //We can not depend upon bin order and positions matching up //so bin_sizes must be reused to contain the end of the bin if(first_positive > 0) { bins[first_positive - 1] = first; for(int ii = first_positive - 2; ii >= 0; --ii) { bins[ii] = first + bin_sizes[ii + 1]; bin_sizes[ii] += bin_sizes[ii + 1]; } //Handling positives following negatives if((unsigned)first_positive < bin_count) { bins[first_positive] = first + bin_sizes[0]; bin_sizes[first_positive] += bin_sizes[0]; } } else bins[0] = first; for(u = first_positive; u < bin_count - 1; u++) { bins[u + 1] = first + bin_sizes[u]; bin_sizes[u + 1] += bin_sizes[u]; } //Swap into place RandomAccessIter nextbinstart = first; for(unsigned ii = 0; ii < bin_count; ++ii) { RandomAccessIter * local_bin = bins + ii; nextbinstart = first + bin_sizes[ii]; for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { for(target_bin = (bins + (shift(*current, log_divisor) - div_min)); target_bin != local_bin; target_bin = bins + (shift(*current, log_divisor) - div_min)) { data_type tmp; RandomAccessIter b = (*target_bin)++; RandomAccessIter * b_bin = bins + (shift(*b, log_divisor) - div_min); if (b_bin != local_bin) { RandomAccessIter c = (*b_bin)++; tmp = *c; *c = *b; } else tmp = *b; *b = *current; *current = tmp; } } *local_bin = nextbinstart; } if(!log_divisor) return; //Handling negative values first size_t max_count = GetMaxCount(log_divisor, last - first); RandomAccessIter lastPos = first; for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) { size_t count = bin_cache[ii] - lastPos; if(count < 2) continue; if(count < max_count) std::sort(lastPos, bin_cache[ii]); //sort negative values using reversed-bin Spreadsort else NegativeFloatSortRec(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift); } //Note: we don't bother sorting the last_bin, which is nans for(size_t u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) { size_t count = bin_cache[u] - lastPos; if(count < 2) continue; if(count < max_count) std::sort(lastPos, bin_cache[u]); //sort positive values using normal Spreadsort else SpreadSortRec(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift); } } template inline void FloatSort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, RightShift shift) { std::vector bin_sizes; std::vector bin_cache; FloatSortRec(first, last, bin_cache, 0, bin_sizes, shift); } //integer_sort with functors template inline void float_sort(RandomAccessIter first, RandomAccessIter last, RightShift shift) { if(last - first < MIN_SORT_SIZE) std::sort(first, last); else FloatSort(first, last, shift(*first, 0), *first, shift); } #endif