/* Templated spreadsort library Copyright (c) 2001-2008 Steven J. Ross Some improvements suggested by: Phil Endecott and Frank Gennari Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef SPREADSORT_H #define SPREADSORT_H #include "constants.h" #include #include //This only works on unsigned data types template inline unsigned RoughLog2Size(const T& input) { unsigned result = 0; //The && is necessary on some compilers to avoid infinite loops; it doesn't significantly impair performance while((input >> result) && (result < (8*sizeof(T)))) ++result; return result; } //Gets the maximum size which we'll call spreadsort on to control worst-case performance //Maintains both a minimum size to recurse and a check of distribution size versus count //This is called for a set of bins, instead of bin-by-bin, to avoid performance overhead inline size_t GetMaxCount(unsigned log_range, size_t count) { unsigned divisor = RoughLog2Size(count); //Making sure the divisor is positive if(divisor > LOG_MEAN_BIN_SIZE) divisor -= LOG_MEAN_BIN_SIZE; else divisor = 1; unsigned relative_width = (LOG_CONST * log_range)/((divisor > MAX_SPLITS) ? MAX_SPLITS : divisor); //Don't try to bitshift more than the size of an element if((8*sizeof(size_t)) <= relative_width) relative_width = (8*sizeof(size_t)) - 1; return 1 << ((relative_width < (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT)) ? (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT) : relative_width); } //Find the minimum and maximum using < template inline void FindExtremes(RandomAccessIter current, RandomAccessIter last, RandomAccessIter & max, RandomAccessIter & min) { min = max = current; //Start from the second item, as max and min are initialized to the first while(++current < last) { if(*max < *current) max = current; else if(*current < *min) min = current; } } //Uses a user-defined comparison operator to find minimum and maximum template inline void FindExtremes(RandomAccessIter current, RandomAccessIter last, RandomAccessIter & max, RandomAccessIter & min, Compare comp) { min = max = current; while(++current < last) { if(comp(*max, *current)) max = current; else if(comp(*current, *min)) min = current; } } //Gets a non-negative right bit shift to operate as a logarithmic divisor inline int GetLogDivisor(size_t count, unsigned log_range) { int log_divisor; //If we can finish in one iteration without exceeding either (2 to the MAX_SPLITS) or n bins, do so if((log_divisor = log_range - RoughLog2Size(count)) <= 0 && log_range < MAX_SPLITS) log_divisor = 0; else { //otherwise divide the data into an optimized number of pieces log_divisor += LOG_MEAN_BIN_SIZE; if(log_divisor < 0) log_divisor = 0; //Cannot exceed MAX_SPLITS or cache misses slow down bin lookups dramatically if((log_range - log_divisor) > MAX_SPLITS) log_divisor = log_range - MAX_SPLITS; } return log_divisor; } template inline RandomAccessIter * SizeBins(std::vector &bin_sizes, std::vector &bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count) { //Assure space for the size of each bin, followed by initializing sizes if(bin_count > bin_sizes.size()) bin_sizes.resize(bin_count); for(size_t u = 0; u < bin_count; u++) bin_sizes[u] = 0; //Make sure there is space for the bins cache_end = cache_offset + bin_count; if(cache_end > bin_cache.size()) bin_cache.resize(cache_end); return &(bin_cache[cache_offset]); } //Implementation for recursive integer sorting template inline void SpreadSortRec(RandomAccessIter first, RandomAccessIter last, std::vector &bin_cache, unsigned cache_offset , std::vector &bin_sizes) { //This step is roughly 10% of runtime, but it helps avoid worst-case behavior and improve behavior with real data //If you know the maximum and minimum ahead of time, you can pass those values in and skip this step for the first iteration RandomAccessIter max, min; FindExtremes(first, last, max, min); //max and min will be the same (the first item) iff all values are equivalent if(max == min) return; RandomAccessIter * target_bin; unsigned log_divisor = GetLogDivisor(last - first, RoughLog2Size((size_t)(*max >> 0) - (*min >> 0))); div_type div_min = *min >> log_divisor; div_type div_max = *max >> log_divisor; unsigned bin_count = div_max - div_min + 1; unsigned cache_end; RandomAccessIter * bins = SizeBins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count); //Calculating the size of each bin; this takes roughly 10% of runtime for (RandomAccessIter current = first; current != last;) bin_sizes[(*(current++) >> log_divisor) - div_min]++; //Assign the bin positions bins[0] = first; for(unsigned u = 0; u < bin_count - 1; u++) bins[u + 1] = bins[u] + bin_sizes[u]; //Swap into place //This dominates runtime, mostly in the swap and bin lookups RandomAccessIter nextbinstart = first; for(unsigned u = 0; u < bin_count - 1; ++u) { RandomAccessIter * local_bin = bins + u; nextbinstart += bin_sizes[u]; //Iterating over each element in this bin for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { //Swapping elements in current into place until the correct element has been swapped in for(target_bin = (bins + ((*current >> log_divisor) - div_min)); target_bin != local_bin; target_bin = bins + ((*current >> log_divisor) - div_min)) { //3-way swap; this is about 1% faster than a 2-way swap with integers //The main advantage is less copies are involved per item put in the correct place data_type tmp; RandomAccessIter b = (*target_bin)++; RandomAccessIter * b_bin = bins + ((*b >> log_divisor) - div_min); if (b_bin != local_bin) { RandomAccessIter c = (*b_bin)++; tmp = *c; *c = *b; } else tmp = *b; *b = *current; *current = tmp; } } *local_bin = nextbinstart; } bins[bin_count - 1] = last; //If we've bucketsorted, the array is sorted and we should skip recursion if(!log_divisor) return; //Recursing; log_divisor is the remaining range size_t max_count = GetMaxCount(log_divisor, last - first); RandomAccessIter lastPos = first; for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) { size_t count = bin_cache[u] - lastPos; //don't sort unless there are at least two items to compare if(count < 2) continue; //using std::sort if its worst-case is better if(count < max_count) std::sort(lastPos, bin_cache[u]); else SpreadSortRec(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes); } } //Generic bitshift-based 3-way swapping code template inline void InnerSwapLoop(RandomAccessIter * bins, const RandomAccessIter & nextbinstart, unsigned ii, RightShift &shift , const unsigned log_divisor, const div_type div_min) { RandomAccessIter * local_bin = bins + ii; for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { for(RandomAccessIter * target_bin = (bins + (shift(*current, log_divisor) - div_min)); target_bin != local_bin; target_bin = bins + (shift(*current, log_divisor) - div_min)) { data_type tmp; RandomAccessIter b = (*target_bin)++; RandomAccessIter * b_bin = bins + (shift(*b, log_divisor) - div_min); //Three-way swap; if the item to be swapped doesn't belong in the current bin, swap it to where it belongs if (b_bin != local_bin) { RandomAccessIter c = (*b_bin)++; tmp = *c; *c = *b; } //Note: we could increment current once the swap is done in this case, but that seems to impair performance else tmp = *b; *b = *current; *current = tmp; } } *local_bin = nextbinstart; } //Standard swapping wrapper for ascending values template inline void SwapLoop(RandomAccessIter * bins, RandomAccessIter & nextbinstart, unsigned ii, RightShift &shift , const std::vector &bin_sizes, const unsigned log_divisor, const div_type div_min) { nextbinstart += bin_sizes[ii]; InnerSwapLoop(bins, nextbinstart, ii, shift, log_divisor, div_min); } //Functor implementation for recursive sorting template inline void SpreadSortRec(RandomAccessIter first, RandomAccessIter last, std::vector &bin_cache, unsigned cache_offset , std::vector &bin_sizes, RightShift shift, Compare comp) { RandomAccessIter max, min; FindExtremes(first, last, max, min, comp); if(max == min) return; unsigned log_divisor = GetLogDivisor(last - first, RoughLog2Size((size_t)(shift(*max, 0)) - (shift(*min, 0)))); div_type div_min = shift(*min, log_divisor); div_type div_max = shift(*max, log_divisor); unsigned bin_count = div_max - div_min + 1; unsigned cache_end; RandomAccessIter * bins = SizeBins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count); //Calculating the size of each bin for (RandomAccessIter current = first; current != last;) bin_sizes[shift(*(current++), log_divisor) - div_min]++; bins[0] = first; for(unsigned u = 0; u < bin_count - 1; u++) bins[u + 1] = bins[u] + bin_sizes[u]; //Swap into place RandomAccessIter nextbinstart = first; for(unsigned u = 0; u < bin_count - 1; ++u) SwapLoop(bins, nextbinstart, u, shift, bin_sizes, log_divisor, div_min); bins[bin_count - 1] = last; //If we've bucketsorted, the array is sorted and we should skip recursion if(!log_divisor) return; //Recursing size_t max_count = GetMaxCount(log_divisor, last - first); RandomAccessIter lastPos = first; for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) { size_t count = bin_cache[u] - lastPos; if(count < 2) continue; if(count < max_count) std::sort(lastPos, bin_cache[u], comp); else SpreadSortRec(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift, comp); } } //Functor implementation for recursive sorting with only Shift overridden template inline void SpreadSortRec(RandomAccessIter first, RandomAccessIter last, std::vector &bin_cache, unsigned cache_offset , std::vector &bin_sizes, RightShift shift) { RandomAccessIter max, min; FindExtremes(first, last, max, min); if(max == min) return; unsigned log_divisor = GetLogDivisor(last - first, RoughLog2Size((size_t)(shift(*max, 0)) - (shift(*min, 0)))); div_type div_min = shift(*min, log_divisor); div_type div_max = shift(*max, log_divisor); unsigned bin_count = div_max - div_min + 1; unsigned cache_end; RandomAccessIter * bins = SizeBins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count); //Calculating the size of each bin for (RandomAccessIter current = first; current != last;) bin_sizes[shift(*(current++), log_divisor) - div_min]++; bins[0] = first; for(unsigned u = 0; u < bin_count - 1; u++) bins[u + 1] = bins[u] + bin_sizes[u]; //Swap into place RandomAccessIter nextbinstart = first; for(unsigned ii = 0; ii < bin_count - 1; ++ii) SwapLoop(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min); bins[bin_count - 1] = last; //If we've bucketsorted, the array is sorted and we should skip recursion if(!log_divisor) return; //Recursing size_t max_count = GetMaxCount(log_divisor, last - first); RandomAccessIter lastPos = first; for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) { size_t count = bin_cache[u] - lastPos; if(count < 2) continue; if(count < max_count) std::sort(lastPos, bin_cache[u]); else SpreadSortRec(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift); } } //Holds the bin vector and makes the initial recursive call template inline void SpreadSort(RandomAccessIter first, RandomAccessIter last, div_type, data_type) { std::vector bin_sizes; std::vector bin_cache; SpreadSortRec(first, last, bin_cache, 0, bin_sizes); } template inline void SpreadSort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, RightShift shift, Compare comp) { std::vector bin_sizes; std::vector bin_cache; SpreadSortRec(first, last, bin_cache, 0, bin_sizes, shift, comp); } template inline void SpreadSort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, RightShift shift) { std::vector bin_sizes; std::vector bin_cache; SpreadSortRec(first, last, bin_cache, 0, bin_sizes, shift); } //Top-level sorting call for integers template inline void integer_sort(RandomAccessIter first, RandomAccessIter last) { //Don't sort if it's too small to optimize if(last - first < MIN_SORT_SIZE) std::sort(first, last); else SpreadSort(first, last, *first >> 0, *first); } //integer_sort with functors template inline void integer_sort(RandomAccessIter first, RandomAccessIter last, RightShift shift, Compare comp) { if(last - first < MIN_SORT_SIZE) std::sort(first, last, comp); else SpreadSort(first, last, shift(*first, 0), *first, shift, comp); } //integer_sort with RightShift functor template inline void integer_sort(RandomAccessIter first, RandomAccessIter last, RightShift shift) { if(last - first < MIN_SORT_SIZE) std::sort(first, last); else SpreadSort(first, last, shift(*first, 0), *first, shift); } //------------------------------------------------------ float_sort source -------------------------------------- template inline void FindExtremes(RandomAccessIter current, RandomAccessIter last, div_type & max, div_type & min, RightShift shift) { min = max = shift(*current, 0); while(++current < last) { div_type value = shift(*current, 0); if(max < value) max = value; else if(value < min) min = value; } } //Casts a RandomAccessIter to the specified data type template inline cast_type CastFloatIter(const RandomAccessIter & floatiter) { return *((cast_type *)(&(*floatiter))); } //Specialized swap loops for floating-point casting template inline void InnerFloatSwapLoop(RandomAccessIter * bins, const RandomAccessIter & nextbinstart, unsigned ii , const unsigned log_divisor, const div_type div_min) { RandomAccessIter * local_bin = bins + ii; for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { for(RandomAccessIter * target_bin = (bins + ((CastFloatIter(current) >> log_divisor) - div_min)); target_bin != local_bin; target_bin = bins + ((CastFloatIter(current) >> log_divisor) - div_min)) { data_type tmp; RandomAccessIter b = (*target_bin)++; RandomAccessIter * b_bin = bins + ((CastFloatIter(b) >> log_divisor) - div_min); //Three-way swap; if the item to be swapped doesn't belong in the current bin, swap it to where it belongs if (b_bin != local_bin) { RandomAccessIter c = (*b_bin)++; tmp = *c; *c = *b; } else tmp = *b; *b = *current; *current = tmp; } } *local_bin = nextbinstart; } template inline void FloatSwapLoop(RandomAccessIter * bins, RandomAccessIter & nextbinstart, unsigned ii , const std::vector &bin_sizes, const unsigned log_divisor, const div_type div_min) { nextbinstart += bin_sizes[ii]; InnerFloatSwapLoop(bins, nextbinstart, ii, log_divisor, div_min); } template inline void FindExtremes(RandomAccessIter current, RandomAccessIter last, cast_type & max, cast_type & min) { min = max = CastFloatIter(current); while(++current < last) { cast_type value = CastFloatIter(current); if(max < value) max = value; else if(value < min) min = value; } } //Special-case sorting of positive floats with casting instead of a RightShift template inline void PositiveFloatSortRec(RandomAccessIter first, RandomAccessIter last, std::vector &bin_cache, unsigned cache_offset , std::vector &bin_sizes) { div_type max, min; FindExtremes(first, last, max, min); if(max == min) return; unsigned log_divisor = GetLogDivisor(last - first, RoughLog2Size((size_t)(max) - min)); div_type div_min = min >> log_divisor; div_type div_max = max >> log_divisor; unsigned bin_count = div_max - div_min + 1; unsigned cache_end; RandomAccessIter * bins = SizeBins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count); //Calculating the size of each bin for (RandomAccessIter current = first; current != last;) bin_sizes[(CastFloatIter(current++) >> log_divisor) - div_min]++; bins[0] = first; for(unsigned u = 0; u < bin_count - 1; u++) bins[u + 1] = bins[u] + bin_sizes[u]; //Swap into place RandomAccessIter nextbinstart = first; for(unsigned u = 0; u < bin_count - 1; ++u) FloatSwapLoop(bins, nextbinstart, u, bin_sizes, log_divisor, div_min); bins[bin_count - 1] = last; //Return if we've completed bucketsorting if(!log_divisor) return; //Recursing size_t max_count = GetMaxCount(log_divisor, last - first); RandomAccessIter lastPos = first; for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) { size_t count = bin_cache[u] - lastPos; if(count < 2) continue; if(count < max_count) std::sort(lastPos, bin_cache[u]); else PositiveFloatSortRec(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes); } } //Sorting Negative Floats //Note that bins are iterated in reverse order because max_neg_float = min_neg_int template inline void NegativeFloatSortRec(RandomAccessIter first, RandomAccessIter last, std::vector &bin_cache, unsigned cache_offset , std::vector &bin_sizes) { div_type max, min; FindExtremes(first, last, max, min); if(max == min) return; unsigned log_divisor = GetLogDivisor(last - first, RoughLog2Size((size_t)(max) - min)); div_type div_min = min >> log_divisor; div_type div_max = max >> log_divisor; unsigned bin_count = div_max - div_min + 1; unsigned cache_end; RandomAccessIter * bins = SizeBins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count); //Calculating the size of each bin for (RandomAccessIter current = first; current != last;) bin_sizes[(CastFloatIter(current++) >> log_divisor) - div_min]++; bins[bin_count - 1] = first; for(int ii = bin_count - 2; ii >= 0; --ii) bins[ii] = bins[ii + 1] + bin_sizes[ii + 1]; //Swap into place RandomAccessIter nextbinstart = first; //The last bin will always have the correct elements in it for(int ii = bin_count - 1; ii > 0; --ii) FloatSwapLoop(bins, nextbinstart, ii, bin_sizes, log_divisor, div_min); //Since we don't process the last bin, we need to update its end position bin_cache[cache_offset] = last; //Return if we've completed bucketsorting if(!log_divisor) return; //Recursing size_t max_count = GetMaxCount(log_divisor, last - first); RandomAccessIter lastPos = first; for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) { size_t count = bin_cache[ii] - lastPos; if(count < 2) continue; if(count < max_count) std::sort(lastPos, bin_cache[ii]); else NegativeFloatSortRec(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes); } } //Sorting Negative Floats //Note that bins are iterated in reverse order because max_neg_float = min_neg_int template inline void NegativeFloatSortRec(RandomAccessIter first, RandomAccessIter last, std::vector &bin_cache, unsigned cache_offset , std::vector &bin_sizes, RightShift shift) { div_type max, min; FindExtremes(first, last, max, min, shift); if(max == min) return; unsigned log_divisor = GetLogDivisor(last - first, RoughLog2Size((size_t)(max) - min)); div_type div_min = min >> log_divisor; div_type div_max = max >> log_divisor; unsigned bin_count = div_max - div_min + 1; unsigned cache_end; RandomAccessIter * bins = SizeBins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count); //Calculating the size of each bin for (RandomAccessIter current = first; current != last;) bin_sizes[shift(*(current++), log_divisor) - div_min]++; bins[bin_count - 1] = first; for(int ii = bin_count - 2; ii >= 0; --ii) bins[ii] = bins[ii + 1] + bin_sizes[ii + 1]; //Swap into place RandomAccessIter nextbinstart = first; //The last bin will always have the correct elements in it for(int ii = bin_count - 1; ii > 0; --ii) SwapLoop(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min); //Since we don't process the last bin, we need to update its end position bin_cache[cache_offset] = last; //Return if we've completed bucketsorting if(!log_divisor) return; //Recursing size_t max_count = GetMaxCount(log_divisor, last - first); RandomAccessIter lastPos = first; for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) { size_t count = bin_cache[ii] - lastPos; if(count < 2) continue; if(count < max_count) std::sort(lastPos, bin_cache[ii]); else NegativeFloatSortRec(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift); } } template inline void NegativeFloatSortRec(RandomAccessIter first, RandomAccessIter last, std::vector &bin_cache, unsigned cache_offset , std::vector &bin_sizes, RightShift shift, Compare comp) { div_type max, min; FindExtremes(first, last, max, min, shift); if(max == min) return; unsigned log_divisor = GetLogDivisor(last - first, RoughLog2Size((size_t)(max) - min)); div_type div_min = min >> log_divisor; div_type div_max = max >> log_divisor; unsigned bin_count = div_max - div_min + 1; unsigned cache_end; RandomAccessIter * bins = SizeBins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count); //Calculating the size of each bin for (RandomAccessIter current = first; current != last;) bin_sizes[shift(*(current++), log_divisor) - div_min]++; bins[bin_count - 1] = first; for(int ii = bin_count - 2; ii >= 0; --ii) bins[ii] = bins[ii + 1] + bin_sizes[ii + 1]; //Swap into place RandomAccessIter nextbinstart = first; //The last bin will always have the correct elements in it for(int ii = bin_count - 1; ii > 0; --ii) SwapLoop(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min); //Since we don't process the last bin, we need to update its end position bin_cache[cache_offset] = last; //Return if we've completed bucketsorting if(!log_divisor) return; //Recursing size_t max_count = GetMaxCount(log_divisor, last - first); RandomAccessIter lastPos = first; for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) { size_t count = bin_cache[ii] - lastPos; if(count < 2) continue; if(count < max_count) std::sort(lastPos, bin_cache[ii], comp); else NegativeFloatSortRec(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift, comp); } } //Casting special-case for floating-point sorting template inline void FloatSortRec(RandomAccessIter first, RandomAccessIter last, std::vector &bin_cache, unsigned cache_offset , std::vector &bin_sizes) { div_type max, min; FindExtremes(first, last, max, min); if(max == min) return; unsigned log_divisor = GetLogDivisor(last - first, RoughLog2Size((size_t)(max) - min)); div_type div_min = min >> log_divisor; div_type div_max = max >> log_divisor; unsigned bin_count = div_max - div_min + 1; unsigned cache_end; RandomAccessIter * bins = SizeBins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count); //Calculating the size of each bin for (RandomAccessIter current = first; current != last;) bin_sizes[(CastFloatIter(current++) >> log_divisor) - div_min]++; //The index of the first positive bin div_type first_positive = (div_min < 0) ? -div_min : 0; //Resetting if all bins are negative if(cache_offset + first_positive > cache_end) first_positive = cache_end - cache_offset; //Reversing the order of the negative bins //Note that because of the negative/positive ordering direction flip //We can not depend upon bin order and positions matching up //so bin_sizes must be reused to contain the end of the bin if(first_positive > 0) { bins[first_positive - 1] = first; for(int ii = first_positive - 2; ii >= 0; --ii) { bins[ii] = first + bin_sizes[ii + 1]; bin_sizes[ii] += bin_sizes[ii + 1]; } //Handling positives following negatives if((unsigned)first_positive < bin_count) { bins[first_positive] = first + bin_sizes[0]; bin_sizes[first_positive] += bin_sizes[0]; } } else bins[0] = first; for(unsigned u = first_positive; u < bin_count - 1; u++) { bins[u + 1] = first + bin_sizes[u]; bin_sizes[u + 1] += bin_sizes[u]; } //Swap into place RandomAccessIter nextbinstart = first; for(unsigned u = 0; u < bin_count; ++u) { nextbinstart = first + bin_sizes[u]; InnerFloatSwapLoop(bins, nextbinstart, u, log_divisor, div_min); } if(!log_divisor) return; //Handling negative values first size_t max_count = GetMaxCount(log_divisor, last - first); RandomAccessIter lastPos = first; for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) { size_t count = bin_cache[ii] - lastPos; if(count < 2) continue; if(count < max_count) std::sort(lastPos, bin_cache[ii]); //sort negative values using reversed-bin Spreadsort else NegativeFloatSortRec(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes); } for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) { size_t count = bin_cache[u] - lastPos; if(count < 2) continue; if(count < max_count) std::sort(lastPos, bin_cache[u]); //sort positive values using normal Spreadsort else PositiveFloatSortRec(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes); } } //Functor implementation for recursive sorting template inline void FloatSortRec(RandomAccessIter first, RandomAccessIter last, std::vector &bin_cache, unsigned cache_offset , std::vector &bin_sizes, RightShift shift) { div_type max, min; FindExtremes(first, last, max, min, shift); if(max == min) return; unsigned log_divisor = GetLogDivisor(last - first, RoughLog2Size((size_t)(max) - min)); div_type div_min = min >> log_divisor; div_type div_max = max >> log_divisor; unsigned bin_count = div_max - div_min + 1; unsigned cache_end; RandomAccessIter * bins = SizeBins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count); //Calculating the size of each bin for (RandomAccessIter current = first; current != last;) bin_sizes[shift(*(current++), log_divisor) - div_min]++; //The index of the first positive bin div_type first_positive = (div_min < 0) ? -div_min : 0; //Resetting if all bins are negative if(cache_offset + first_positive > cache_end) first_positive = cache_end - cache_offset; //Reversing the order of the negative bins //Note that because of the negative/positive ordering direction flip //We can not depend upon bin order and positions matching up //so bin_sizes must be reused to contain the end of the bin if(first_positive > 0) { bins[first_positive - 1] = first; for(int ii = first_positive - 2; ii >= 0; --ii) { bins[ii] = first + bin_sizes[ii + 1]; bin_sizes[ii] += bin_sizes[ii + 1]; } //Handling positives following negatives if((unsigned)first_positive < bin_count) { bins[first_positive] = first + bin_sizes[0]; bin_sizes[first_positive] += bin_sizes[0]; } } else bins[0] = first; for(unsigned u = first_positive; u < bin_count - 1; u++) { bins[u + 1] = first + bin_sizes[u]; bin_sizes[u + 1] += bin_sizes[u]; } //Swap into place RandomAccessIter nextbinstart = first; for(unsigned u = 0; u < bin_count; ++u) { nextbinstart = first + bin_sizes[u]; InnerSwapLoop(bins, nextbinstart, u, shift, log_divisor, div_min); } //Return if we've completed bucketsorting if(!log_divisor) return; //Handling negative values first size_t max_count = GetMaxCount(log_divisor, last - first); RandomAccessIter lastPos = first; for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) { size_t count = bin_cache[ii] - lastPos; if(count < 2) continue; if(count < max_count) std::sort(lastPos, bin_cache[ii]); //sort negative values using reversed-bin Spreadsort else NegativeFloatSortRec(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift); } for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) { size_t count = bin_cache[u] - lastPos; if(count < 2) continue; if(count < max_count) std::sort(lastPos, bin_cache[u]); //sort positive values using normal Spreadsort else SpreadSortRec(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift); } } template inline void FloatSortRec(RandomAccessIter first, RandomAccessIter last, std::vector &bin_cache, unsigned cache_offset , std::vector &bin_sizes, RightShift shift, Compare comp) { div_type max, min; FindExtremes(first, last, max, min, shift); if(max == min) return; unsigned log_divisor = GetLogDivisor(last - first, RoughLog2Size((size_t)(max) - min)); div_type div_min = min >> log_divisor; div_type div_max = max >> log_divisor; unsigned bin_count = div_max - div_min + 1; unsigned cache_end; RandomAccessIter * bins = SizeBins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count); //Calculating the size of each bin for (RandomAccessIter current = first; current != last;) bin_sizes[shift(*(current++), log_divisor) - div_min]++; //The index of the first positive bin div_type first_positive = (div_min < 0) ? -div_min : 0; //Resetting if all bins are negative if(cache_offset + first_positive > cache_end) first_positive = cache_end - cache_offset; //Reversing the order of the negative bins //Note that because of the negative/positive ordering direction flip //We can not depend upon bin order and positions matching up //so bin_sizes must be reused to contain the end of the bin if(first_positive > 0) { bins[first_positive - 1] = first; for(int ii = first_positive - 2; ii >= 0; --ii) { bins[ii] = first + bin_sizes[ii + 1]; bin_sizes[ii] += bin_sizes[ii + 1]; } //Handling positives following negatives if((unsigned)first_positive < bin_count) { bins[first_positive] = first + bin_sizes[0]; bin_sizes[first_positive] += bin_sizes[0]; } } else bins[0] = first; for(unsigned u = first_positive; u < bin_count - 1; u++) { bins[u + 1] = first + bin_sizes[u]; bin_sizes[u + 1] += bin_sizes[u]; } //Swap into place RandomAccessIter nextbinstart = first; for(unsigned u = 0; u < bin_count; ++u) { nextbinstart = first + bin_sizes[u]; InnerSwapLoop(bins, nextbinstart, u, shift, log_divisor, div_min); } //Return if we've completed bucketsorting if(!log_divisor) return; //Handling negative values first size_t max_count = GetMaxCount(log_divisor, last - first); RandomAccessIter lastPos = first; for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) { size_t count = bin_cache[ii] - lastPos; if(count < 2) continue; if(count < max_count) std::sort(lastPos, bin_cache[ii]); //sort negative values using reversed-bin Spreadsort else NegativeFloatSortRec(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift, comp); } for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) { size_t count = bin_cache[u] - lastPos; if(count < 2) continue; if(count < max_count) std::sort(lastPos, bin_cache[u]); //sort positive values using normal Spreadsort else SpreadSortRec(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift, comp); } } template inline void FloatSort(RandomAccessIter first, RandomAccessIter last, cast_type, data_type) { std::vector bin_sizes; std::vector bin_cache; FloatSortRec(first, last, bin_cache, 0, bin_sizes); } template inline void FloatSort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, RightShift shift) { std::vector bin_sizes; std::vector bin_cache; FloatSortRec(first, last, bin_cache, 0, bin_sizes, shift); } template inline void FloatSort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, RightShift shift, Compare comp) { std::vector bin_sizes; std::vector bin_cache; FloatSortRec(first, last, bin_cache, 0, bin_sizes, shift, comp); } //float_sort with casting //The cast_type must be equal in size to the data type, and must be a signed integer template inline void float_sort_cast(RandomAccessIter first, RandomAccessIter last, cast_type cVal) { //CYGWIN gcc has an optimization bug with -O2 and -O3 options that causes the InnerSwapLoop increment operation to be executed out of order. //This optimization bug causes a crash, and there is no clear way to work around it while maintaining algorithmic performance. //For that reason, float_sort just calls std::sort on CYGWIN #ifdef __CYGWIN__ std::sort(first, last); #else if(last - first < MIN_SORT_SIZE) std::sort(first, last); else FloatSort(first, last, cVal, *first); #endif } //float_sort with casting to an int //Only use this with IEEE floating-point numbers template inline void float_sort_cast_to_int(RandomAccessIter first, RandomAccessIter last) { int cVal = 0; float_sort_cast(first, last, cVal); } //float_sort with functors template inline void float_sort(RandomAccessIter first, RandomAccessIter last, RightShift shift) { //CYGWIN gcc has an optimization bug with -O2 and -O3 options that causes the InnerSwapLoop increment operation to be executed out of order. //This optimization bug causes a crash, and there is no clear way to work around it while maintaining algorithmic performance. //For that reason, float_sort just calls std::sort on CYGWIN #ifdef __CYGWIN__ std::sort(first, last); #else if(last - first < MIN_SORT_SIZE) std::sort(first, last); else FloatSort(first, last, shift(*first, 0), *first, shift); #endif } template inline void float_sort(RandomAccessIter first, RandomAccessIter last, RightShift shift, Compare comp) { //CYGWIN gcc has an optimization bug with -O2 and -O3 options that causes the InnerSwapLoop increment operation to be executed out of order. //This optimization bug causes a crash, and there is no clear way to work around it while maintaining algorithmic performance. //For that reason, float_sort just calls std::sort on CYGWIN #ifdef __CYGWIN__ std::sort(first, last, comp); #else if(last - first < MIN_SORT_SIZE) std::sort(first, last, comp); else FloatSort(first, last, shift(*first, 0), *first, shift, comp); #endif } //------------------------------------------------- string_sort source --------------------------------------------- //Offsetting on identical characters. This function works a character at a time for optimal worst-case performance. template inline void UpdateOffset(RandomAccessIter first, RandomAccessIter finish, unsigned &charOffset) { unsigned nextOffset = charOffset; bool done = false; while(!done) { RandomAccessIter curr = first; do { //ignore empties, but if the nextOffset would exceed the length or not match, exit; we've found the last matching character if((*curr).size() > charOffset && ((*curr).size() <= (nextOffset + 1) || (*curr)[nextOffset] != (*first)[nextOffset])) { done = true; break; } } while(++curr != finish); if(!done) ++nextOffset; } charOffset = nextOffset; } //Offsetting on identical characters. This function works a character at a time for optimal worst-case performance. template inline void UpdateOffset(RandomAccessIter first, RandomAccessIter finish, unsigned &charOffset, GetChar getchar, GetLength length) { unsigned nextOffset = charOffset; bool done = false; while(!done) { RandomAccessIter curr = first; do { //ignore empties, but if the nextOffset would exceed the length or not match, exit; we've found the last matching character if(length(*curr) > charOffset && (length(*curr) <= (nextOffset + 1) || getchar((*curr), nextOffset) != getchar((*first), nextOffset))) { done = true; break; } } while(++curr != finish); if(!done) ++nextOffset; } charOffset = nextOffset; } //String sorting recursive implementation template inline void StringSortRec(RandomAccessIter first, RandomAccessIter last, unsigned charOffset, std::vector &bin_cache , unsigned cache_offset, std::vector &bin_sizes) { //This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact. //Iterate to the end of the empties. If all empty, return while((*first).size() <= charOffset) { if(++first == last) return; } RandomAccessIter finish = last - 1; //Getting the last non-empty for(;(*finish).size() <= charOffset; --finish); ++finish; //Offsetting on identical characters. This section works a character at a time for optimal worst-case performance. UpdateOffset(first, finish, charOffset); const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8)); //This is bin_count/log(count) for 256 items, log(count) = 8, log(log(count)) = 3 //That's the worst-case runtime match between the two sorting approaches const unsigned max_size = bin_count >> 3; const unsigned membin_count = bin_count + 1; unsigned cache_end; RandomAccessIter * bins = SizeBins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1; //Calculating the size of each bin; this takes roughly 10% of runtime for (RandomAccessIter current = first; current != last; ++current) { if((*current).size() <= charOffset) { bin_sizes[0]++; } else bin_sizes[static_cast((*current)[charOffset]) + 1]++; } //Assign the bin positions bin_cache[cache_offset] = first; for(unsigned u = 0; u < membin_count - 1; u++) bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u]; //Swap into place RandomAccessIter nextbinstart = first; //handling empty bins RandomAccessIter * local_bin = &(bin_cache[cache_offset]); nextbinstart += bin_sizes[0]; RandomAccessIter * target_bin; //Iterating over each element in the bin of empties for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { //empties belong in this bin while((*current).size() > charOffset) { target_bin = bins + static_cast((*current)[charOffset]); iter_swap(current, (*target_bin)++); } } *local_bin = nextbinstart; //iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops unsigned last_bin = bin_count - 1; for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin); //This dominates runtime, mostly in the swap and bin lookups for(unsigned u = 0; u < last_bin; ++u) { local_bin = bins + u; nextbinstart += bin_sizes[u + 1]; //Iterating over each element in this bin for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { //Swapping elements in current into place until the correct element has been swapped in for(target_bin = bins + static_cast((*current)[charOffset]); target_bin != local_bin; target_bin = bins + static_cast((*current)[charOffset])) iter_swap(current, (*target_bin)++); } *local_bin = nextbinstart; } bins[last_bin] = last; //Recursing RandomAccessIter lastPos = bin_cache[cache_offset]; //Skip this loop for empties for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) { size_t count = bin_cache[u] - lastPos; //don't sort unless there are at least two items to compare if(count < 2) continue; //using std::sort if its worst-case is better if(count < max_size) std::sort(lastPos, bin_cache[u]); else StringSortRec(lastPos, bin_cache[u], charOffset + 1, bin_cache, cache_end, bin_sizes); } } //Sorts strings in reverse order, with empties at the end template inline void ReverseStringSortRec(RandomAccessIter first, RandomAccessIter last, unsigned charOffset, std::vector &bin_cache , unsigned cache_offset, std::vector &bin_sizes, Compare comp) { //This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact. RandomAccessIter curr = first; //Iterate to the end of the empties. If all empty, return while((*curr).size() <= charOffset) { if(++curr == last) return; } //Getting the last non-empty while((*(--last)).size() <= charOffset); ++last; //Offsetting on identical characters. This section works a character at a time for optimal worst-case performance. UpdateOffset(curr, last, charOffset); RandomAccessIter * target_bin; const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8)); //This is bin_count/log(count) for 256 items, log(count) = 8, log(log(count)) = 3 //That's the worst-case runtime match between the two sorting approaches const unsigned max_size = bin_count >> 3; const unsigned membin_count = bin_count + 1; const unsigned max_bin = bin_count - 1; unsigned cache_end; RandomAccessIter * bins = SizeBins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count); RandomAccessIter * end_bin = &(bin_cache[cache_offset + max_bin]); //Calculating the size of each bin; this takes roughly 10% of runtime for (RandomAccessIter current = first; current != last; ++current) { if((*current).size() <= charOffset) { bin_sizes[bin_count]++; } else bin_sizes[max_bin - static_cast((*current)[charOffset])]++; } //Assign the bin positions bin_cache[cache_offset] = first; for(unsigned u = 0; u < membin_count - 1; u++) bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u]; //Swap into place RandomAccessIter nextbinstart = last; //handling empty bins RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]); RandomAccessIter lastFull = *local_bin; //Iterating over each element in the bin of empties for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { //empties belong in this bin while((*current).size() > charOffset) { target_bin = end_bin - static_cast((*current)[charOffset]); iter_swap(current, (*target_bin)++); } } *local_bin = nextbinstart; nextbinstart = first; //iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops unsigned last_bin = max_bin; for(; last_bin && !bin_sizes[last_bin]; --last_bin); //This dominates runtime, mostly in the swap and bin lookups for(unsigned u = 0; u < last_bin; ++u) { local_bin = bins + u; nextbinstart += bin_sizes[u]; //Iterating over each element in this bin for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { //Swapping elements in current into place until the correct element has been swapped in for(target_bin = end_bin - static_cast((*current)[charOffset]); target_bin != local_bin; target_bin = end_bin - static_cast((*current)[charOffset])) iter_swap(current, (*target_bin)++); } *local_bin = nextbinstart; } bins[last_bin] = lastFull; //Recursing RandomAccessIter lastPos = first; //Skip this loop for empties for(unsigned u = cache_offset; u <= cache_offset + last_bin; lastPos = bin_cache[u], ++u) { size_t count = bin_cache[u] - lastPos; //don't sort unless there are at least two items to compare if(count < 2) continue; //using std::sort if its worst-case is better if(count < max_size) std::sort(lastPos, bin_cache[u], comp); else ReverseStringSortRec(lastPos, bin_cache[u], charOffset + 1, bin_cache, cache_end, bin_sizes, comp); } } //String sorting recursive implementation template inline void StringSortRec(RandomAccessIter first, RandomAccessIter last, unsigned charOffset, std::vector &bin_cache , unsigned cache_offset, std::vector &bin_sizes, GetChar getchar, GetLength length) { //This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact. //Iterate to the end of the empties. If all empty, return while(length(*first) <= charOffset) { if(++first == last) return; } RandomAccessIter finish = last - 1; //Getting the last non-empty for(;length(*finish) <= charOffset; --finish); ++finish; UpdateOffset(first, finish, charOffset, getchar, length); const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8)); //This is bin_count/log(count) for 256 items, log(count) = 8, log(log(count)) = 3 //That's the worst-case runtime match between the two sorting approaches const unsigned max_size = bin_count >> 3; const unsigned membin_count = bin_count + 1; unsigned cache_end; RandomAccessIter * bins = SizeBins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1; //Calculating the size of each bin; this takes roughly 10% of runtime for (RandomAccessIter current = first; current != last; ++current) { if(length(*current) <= charOffset) { bin_sizes[0]++; } else bin_sizes[getchar((*current), charOffset) + 1]++; } //Assign the bin positions bin_cache[cache_offset] = first; for(unsigned u = 0; u < membin_count - 1; u++) bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u]; //Swap into place RandomAccessIter nextbinstart = first; //handling empty bins RandomAccessIter * local_bin = &(bin_cache[cache_offset]); nextbinstart += bin_sizes[0]; RandomAccessIter * target_bin; //Iterating over each element in the bin of empties for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { //empties belong in this bin while(length(*current) > charOffset) { target_bin = bins + getchar((*current), charOffset); iter_swap(current, (*target_bin)++); } } *local_bin = nextbinstart; //iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops unsigned last_bin = bin_count - 1; for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin); //This dominates runtime, mostly in the swap and bin lookups for(unsigned ii = 0; ii < last_bin; ++ii) { local_bin = bins + ii; nextbinstart += bin_sizes[ii + 1]; //Iterating over each element in this bin for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { //Swapping elements in current into place until the correct element has been swapped in for(target_bin = bins + getchar((*current), charOffset); target_bin != local_bin; target_bin = bins + getchar((*current), charOffset)) iter_swap(current, (*target_bin)++); } *local_bin = nextbinstart; } bins[last_bin] = last; //Recursing RandomAccessIter lastPos = bin_cache[cache_offset]; //Skip this loop for empties for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) { size_t count = bin_cache[u] - lastPos; //don't sort unless there are at least two items to compare if(count < 2) continue; //using std::sort if its worst-case is better if(count < max_size) std::sort(lastPos, bin_cache[u]); else StringSortRec(lastPos, bin_cache[u], charOffset + 1, bin_cache, cache_end, bin_sizes, getchar, length); } } //String sorting recursive implementation template inline void StringSortRec(RandomAccessIter first, RandomAccessIter last, unsigned charOffset, std::vector &bin_cache , unsigned cache_offset, std::vector &bin_sizes, GetChar getchar, GetLength length, Compare comp) { //This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact. //Iterate to the end of the empties. If all empty, return while(length(*first) <= charOffset) { if(++first == last) return; } RandomAccessIter finish = last - 1; //Getting the last non-empty for(;length(*finish) <= charOffset; --finish); ++finish; UpdateOffset(first, finish, charOffset, getchar, length); const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8)); //This is bin_count/log(count) for 256 items, log(count) = 8, log(log(count)) = 3 //That's the worst-case runtime match between the two sorting approaches const unsigned max_size = bin_count >> 3; const unsigned membin_count = bin_count + 1; unsigned cache_end; RandomAccessIter * bins = SizeBins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1; //Calculating the size of each bin; this takes roughly 10% of runtime for (RandomAccessIter current = first; current != last; ++current) { if(length(*current) <= charOffset) { bin_sizes[0]++; } else bin_sizes[getchar((*current), charOffset) + 1]++; } //Assign the bin positions bin_cache[cache_offset] = first; for(unsigned u = 0; u < membin_count - 1; u++) bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u]; //Swap into place RandomAccessIter nextbinstart = first; //handling empty bins RandomAccessIter * local_bin = &(bin_cache[cache_offset]); nextbinstart += bin_sizes[0]; RandomAccessIter * target_bin; //Iterating over each element in the bin of empties for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { //empties belong in this bin while(length(*current) > charOffset) { target_bin = bins + getchar((*current), charOffset); iter_swap(current, (*target_bin)++); } } *local_bin = nextbinstart; //iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops unsigned last_bin = bin_count - 1; for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin); //This dominates runtime, mostly in the swap and bin lookups for(unsigned u = 0; u < last_bin; ++u) { local_bin = bins + u; nextbinstart += bin_sizes[u + 1]; //Iterating over each element in this bin for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { //Swapping elements in current into place until the correct element has been swapped in for(target_bin = bins + getchar((*current), charOffset); target_bin != local_bin; target_bin = bins + getchar((*current), charOffset)) iter_swap(current, (*target_bin)++); } *local_bin = nextbinstart; } bins[last_bin] = last; //Recursing RandomAccessIter lastPos = bin_cache[cache_offset]; //Skip this loop for empties for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) { size_t count = bin_cache[u] - lastPos; //don't sort unless there are at least two items to compare if(count < 2) continue; //using std::sort if its worst-case is better if(count < max_size) std::sort(lastPos, bin_cache[u], comp); else StringSortRec(lastPos , bin_cache[u], charOffset + 1, bin_cache, cache_end, bin_sizes, getchar, length, comp); } } //Sorts strings in reverse order, with empties at the end template inline void ReverseStringSortRec(RandomAccessIter first, RandomAccessIter last, unsigned charOffset, std::vector &bin_cache , unsigned cache_offset, std::vector &bin_sizes, GetChar getchar, GetLength length, Compare comp) { //This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact. RandomAccessIter curr = first; //Iterate to the end of the empties. If all empty, return while(length(*curr) <= charOffset) { if(++curr == last) return; } //Getting the last non-empty while(length(*(--last)) <= charOffset); ++last; //Offsetting on identical characters. This section works a character at a time for optimal worst-case performance. UpdateOffset(first, last, charOffset, getchar, length); const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8)); //This is bin_count/log(count) for 256 items, log(count) = 8, log(log(count)) = 3 //That's the worst-case runtime match between the two sorting approaches const unsigned max_size = bin_count >> 3; const unsigned membin_count = bin_count + 1; const unsigned max_bin = bin_count - 1; unsigned cache_end; RandomAccessIter * bins = SizeBins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count); RandomAccessIter *end_bin = &(bin_cache[cache_offset + max_bin]); //Calculating the size of each bin; this takes roughly 10% of runtime for (RandomAccessIter current = first; current != last; ++current) { if(length(*current) <= charOffset) { bin_sizes[bin_count]++; } else bin_sizes[max_bin - getchar((*current), charOffset)]++; } //Assign the bin positions bin_cache[cache_offset] = first; for(unsigned u = 0; u < membin_count - 1; u++) bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u]; //Swap into place RandomAccessIter nextbinstart = last; //handling empty bins RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]); RandomAccessIter lastFull = *local_bin; RandomAccessIter * target_bin; //Iterating over each element in the bin of empties for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { //empties belong in this bin while(length(*current) > charOffset) { target_bin = end_bin - getchar((*current), charOffset); iter_swap(current, (*target_bin)++); } } *local_bin = nextbinstart; nextbinstart = first; //iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops unsigned last_bin = max_bin; for(; last_bin && !bin_sizes[last_bin]; --last_bin); //This dominates runtime, mostly in the swap and bin lookups for(unsigned u = 0; u < last_bin; ++u) { local_bin = bins + u; nextbinstart += bin_sizes[u]; //Iterating over each element in this bin for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) { //Swapping elements in current into place until the correct element has been swapped in for(target_bin = end_bin - getchar((*current), charOffset); target_bin != local_bin; target_bin = end_bin - getchar((*current), charOffset)) iter_swap(current, (*target_bin)++); } *local_bin = nextbinstart; } bins[last_bin] = lastFull; //Recursing RandomAccessIter lastPos = first; //Skip this loop for empties for(unsigned u = cache_offset; u <= cache_offset + last_bin; lastPos = bin_cache[u], ++u) { size_t count = bin_cache[u] - lastPos; //don't sort unless there are at least two items to compare if(count < 2) continue; //using std::sort if its worst-case is better if(count < max_size) std::sort(lastPos, bin_cache[u], comp); else ReverseStringSortRec(lastPos , bin_cache[u], charOffset + 1, bin_cache, cache_end, bin_sizes, getchar, length, comp); } } //Holds the bin vector and makes the initial recursive call template inline void StringSort(RandomAccessIter first, RandomAccessIter last, data_type, unsignedchar_type) { std::vector bin_sizes; std::vector bin_cache; StringSortRec(first, last, 0, bin_cache, 0, bin_sizes); } //Holds the bin vector and makes the initial recursive call template inline void ReverseStringSort(RandomAccessIter first, RandomAccessIter last, Compare comp, data_type, unsignedchar_type) { std::vector bin_sizes; std::vector bin_cache; ReverseStringSortRec(first, last, 0, bin_cache, 0, bin_sizes, comp); } //Holds the bin vector and makes the initial recursive call template inline void StringSort(RandomAccessIter first, RandomAccessIter last, GetChar getchar, GetLength length, data_type, unsignedchar_type) { std::vector bin_sizes; std::vector bin_cache; StringSortRec(first, last, 0, bin_cache, 0, bin_sizes, getchar, length); } //Holds the bin vector and makes the initial recursive call template inline void StringSort(RandomAccessIter first, RandomAccessIter last, GetChar getchar, GetLength length, Compare comp, data_type, unsignedchar_type) { std::vector bin_sizes; std::vector bin_cache; StringSortRec(first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp); } //Holds the bin vector and makes the initial recursive call template inline void ReverseStringSort(RandomAccessIter first, RandomAccessIter last, GetChar getchar, GetLength length, Compare comp, data_type, unsignedchar_type) { std::vector bin_sizes; std::vector bin_cache; ReverseStringSortRec(first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp); } //Allows character-type overloads template inline void string_sort(RandomAccessIter first, RandomAccessIter last, unsignedchar_type unused) { //Don't sort if it's too small to optimize if(last - first < MIN_SORT_SIZE) std::sort(first, last); else StringSort(first, last, *first, unused); } //Top-level sorting call; wraps using default of unsigned char template inline void string_sort(RandomAccessIter first, RandomAccessIter last) { unsigned char unused = '\0'; string_sort(first, last, unused); } //Allows character-type overloads template inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, Compare comp, unsignedchar_type unused) { //Don't sort if it's too small to optimize if(last - first < MIN_SORT_SIZE) std::sort(first, last, comp); else ReverseStringSort(first, last, comp, *first, unused); } //Top-level sorting call; wraps using default of unsigned char template inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, Compare comp) { unsigned char unused = '\0'; reverse_string_sort(first, last, comp, unused); } template inline void string_sort(RandomAccessIter first, RandomAccessIter last, GetChar getchar, GetLength length) { //Don't sort if it's too small to optimize if(last - first < MIN_SORT_SIZE) std::sort(first, last); else { //skipping past empties at the beginning, which allows us to get the character type //.empty() is not used so as not to require a user declaration of it while(!length(*first)) { if(++first == last) return; } StringSort(first, last, getchar, length, *first, getchar((*first), 0)); } } template inline void string_sort(RandomAccessIter first, RandomAccessIter last, GetChar getchar, GetLength length, Compare comp) { //Don't sort if it's too small to optimize if(last - first < MIN_SORT_SIZE) std::sort(first, last, comp); else { //skipping past empties at the beginning, which allows us to get the character type //.empty() is not used so as not to require a user declaration of it while(!length(*first)) { if(++first == last) return; } StringSort(first, last, getchar, length, comp, *first, getchar((*first), 0)); } } template inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, GetChar getchar, GetLength length, Compare comp) { //Don't sort if it's too small to optimize if(last - first < MIN_SORT_SIZE) std::sort(first, last, comp); else { //skipping past empties at the beginning, which allows us to get the character type //.empty() is not used so as not to require a user declaration of it while(!length(*(--last))) { //Note: if there is just one non-empty, and it's at the beginning, then it's already in sorted order if(first == last) return; } //making last just after the end of the non-empty part of the array ++last; ReverseStringSort(first, last, getchar, length, comp, *first, getchar((*first), 0)); } } #endif