/* Generic version of spreadsort Copyright (c) 2002-2008 Steven J. Ross Some improvements suggested by: Phil Endecott and Frank Gennari Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef SPREADSORT_H #define SPREADSORT_H #include "constants.h" #include #include //Container for bin information. A size_t can be used for current_position, but that requires an extra + operation //and doesn't save any space if RandomAccessIter is a 32-bit pointer //Smaller Bins can be used (just a count member) if the swap loop is changed to iterate over bins then each item in the bin //but that doesn't optimize well on some compilers. template struct Bin{ size_t count; RandomAccessIter current_position; }; //This only works on unsigned data types template inline unsigned RoughLog2Size(const T& input) { unsigned result = 0; //The && is necessary on some compilers to avoid infinite loops; it doesn't significantly impair performance while((input >> result) && (result < (8*sizeof(T)))) result++; return result; } //Gets the maximum size which we'll call spreadsort on to control worst-case performance //Maintains both a minimum size to recurse and a check of distribution size versus count //This is called for a set of bins, instead of bin-by-bin, to avoid performance overhead inline size_t GetMaxCount(unsigned log_range, size_t count) { unsigned divisor = RoughLog2Size(count); //Making sure the divisor is positive if(divisor > LOG_MEAN_BIN_SIZE) divisor -= LOG_MEAN_BIN_SIZE; else divisor = 1; unsigned relative_width = (LOG_CONST * log_range)/((divisor > MAX_SPLITS) ? MAX_SPLITS : divisor); //Don't try to bitshift more than the size of an element if((8*sizeof(size_t)) <= relative_width) relative_width = (8*sizeof(size_t)) - 1; return 1 << ((relative_width < (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT)) ? (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT) : relative_width); } //Find the minimum and maximum template inline void FindExtremes(RandomAccessIter current, RandomAccessIter last, RandomAccessIter & max, RandomAccessIter & min) { min = max = current; //Start from the second item, as max and min are initialized to the first while(++current < last) { if(*max < *current) max = current; else if(*current < *min) min = current; } } //Implementation for recursive sorting template inline void SpreadSortRec(RandomAccessIter first, RandomAccessIter last, std::vector > &bin_cache, size_t cache_offset) { size_t bin_count; unsigned log_range; //This step is roughly 10% of runtime, but it helps avoid worst-case behavior and improve behavior with real data //If you know the maximum and minimum ahead of time, you can pass those values in and skip this step for the first iteration RandomAccessIter max, min; FindExtremes(first, last, max, min); //max and min will be the same (the first item) iff all values are equivalent if(max == min) return; size_t u; int log_divisor; Bin * bins; Bin * current_bin; unsigned count = last - first; log_range = RoughLog2Size((size_t)(*max >> 0) - (*min >> 0)); if((log_divisor = log_range - RoughLog2Size(count)) <= 0) log_divisor = 0; else log_divisor += LOG_MEAN_BIN_SIZE; //The below if statement is only necessary on systems with high memory latency relative to their processor speed //Otherwise known as modern processors if((log_range - log_divisor) > MAX_SPLITS) log_divisor = log_range - MAX_SPLITS; div_type div_min = *min >> log_divisor; div_type div_max = *max >> log_divisor; bin_count = div_max - div_min + 1; //Make sure there is space for the bins size_t cache_end = cache_offset + bin_count; if(cache_end > bin_cache.size()) bin_cache.resize(cache_end); bins = &(bin_cache[cache_offset]); //Initializing the bins for(u = 0; u < bin_count; u++) bins[u].count = 0; //Calculating the size of each bin; this takes roughly 10% of runtime for (RandomAccessIter current = first; current != last;) bins[(*(current++) >> log_divisor) - div_min].count++; //Assign the bin positions //Note that the meaning of count changes to be the index of the first position bins[0].current_position = first; for(u = 0; u < bin_count - 1; u++) { bins[u + 1].current_position = bins[u].current_position + bins[u].count; bins[u].count = bins[u].current_position - first; } bins[u].count = bins[u].current_position - first; //Swap into place //This dominates runtime, especially in the swap; std::sort calls are the other main time-user for(unsigned ii = 0; ii < bin_count; ++ii) { Bin * local_bin = bins + ii; RandomAccessIter nextbin = (ii < (bin_count - 1)) ? first + bins[ii + 1].count : last; for(RandomAccessIter current = local_bin->current_position; current < nextbin; ++(local_bin->current_position), ++current) { for(current_bin = (bins + ((*current >> log_divisor) - div_min)); current_bin != local_bin; current_bin = bins + ((*current >> log_divisor) - div_min)) { data_type tmp; RandomAccessIter b = current_bin->current_position++; Bin * b_bin = bins + ((*b >> log_divisor) - div_min); if (b_bin != local_bin) { RandomAccessIter c = b_bin->current_position++; tmp = *c; *c = *b; } else tmp = *b; *b = *current; *current = tmp; } } } //If we've bucketsorted, the array is sorted and we should skip recursion if(!log_divisor) return; //Recursing; log_divisor is the remaining range size_t max_count = GetMaxCount(log_divisor, last - first); for(size_t u = cache_offset; u < cache_end; u++) { size_t count = (bin_cache[u].current_position - first) - bin_cache[u].count; //don't sort unless there are at least two items to compare if(count < 2) continue; //using std::sort if its worst-case is better if(count < max_count) std::sort(first + bin_cache[u].count, bin_cache[u].current_position); else SpreadSortRec(first + bin_cache[u].count, bin_cache[u].current_position, bin_cache, cache_end); } } //Holds the bin vector and makes the initial recursive call template inline void SpreadSort(RandomAccessIter first, RandomAccessIter last, div_type, data_type) { std::vector > bin_cache; SpreadSortRec(first, last, bin_cache, 0); } //Top-level sorting call template inline void integer_sort(RandomAccessIter first, RandomAccessIter last) { //Don't sort if it's too small to optimize if(last - first < MIN_SORT_SIZE) std::sort(first, last); else SpreadSort(first, last, *first >> 0, *first); } #endif