/* Generic version of spreadsort Copyright (c) 2002-2008 Steven J. Ross Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __SPREADSORT__ #define __SPREADSORT__ #include "constants.h" #include template struct Bin{ size_t uCount; _RandomAccessIter CurrentPosition; }; //This only works on unsigned data types template inline unsigned RoughLog2Size(const T& input) { unsigned cResult = 0; //The && is necessary on some compilers to avoid infinite loops; it doesn't significantly impair performance while((input >> cResult) && (cResult < (8*sizeof(T)))) cResult++; return cResult; } //Gets the maximum size which we'll call spreadsort on //Maintains both a minimum size to recurse and a check of distribution size versus count //This is called for a set of bins, instead of bin-by-bin, to avoid performance overhead inline size_t GetMaxCount(unsigned logRange, unsigned uCount) { unsigned logSize = RoughLog2Size(uCount); unsigned uRelativeWidth = (LOG_CONST * logRange)/((logSize > MAX_SPLITS) ? MAX_SPLITS : logSize); //Don't try to bitshift more than the size of an element if((8*sizeof(size_t)) <= uRelativeWidth) uRelativeWidth = (8*sizeof(size_t)) - 1; return 1 << ((uRelativeWidth < (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT)) ? (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT) : uRelativeWidth); } //Find the minimum and maximum template inline void FindExtremes(_RandomAccessIter current, _RandomAccessIter __last, _RandomAccessIter & piMax, _RandomAccessIter & piMin) { piMin = piMax = current; //Start from the second item, as max and min are initialized to the first while(++current < __last) { if(*piMax < *current) piMax = current; else if(*current < *piMin) piMin = current; } } //The sorting implementation template inline Bin<_RandomAccessIter> * SpreadSortCore(_RandomAccessIter __first, const _RandomAccessIter __last, size_t & uBinCount, unsigned & logRange) { //This step is roughly 10% of runtime, but it helps avoid worst-case behavior and improve behavior with real data //If you know the maximum and minimum ahead of time, you can pass those values in and skip this step for the first iteration _RandomAccessIter iMax, iMin; FindExtremes(__first, __last, iMax, iMin); //iMax and iMin will be the same (the first item) iff all values are equivalent if(iMax == iMin) return NULL; size_t u; int LogDivisor; Bin<_RandomAccessIter> * BinArray; Bin<_RandomAccessIter> * CurrentBin; unsigned uCount = __last - __first; logRange = RoughLog2Size((size_t)(*iMax >> 0) - (*iMin >> 0)); if((LogDivisor = logRange - RoughLog2Size(uCount)) <= 0) LogDivisor = 0; else LogDivisor += LOG_MEAN_BIN_SIZE; //The below if statement is only necessary on systems with high memory latency relative to their processor speed //Otherwise known as modern processors if((logRange - LogDivisor) > MAX_SPLITS) LogDivisor = logRange - MAX_SPLITS; divType divMin = *iMin >> LogDivisor; divType divMax = *iMax >> LogDivisor; uBinCount = divMax - divMin + 1; //Allocate the bins and determine their sizes BinArray = (Bin<_RandomAccessIter> *) calloc(uBinCount, sizeof(Bin<_RandomAccessIter>)); //Memory allocation failure check and clean return with sorted results if(!BinArray) { printf("Using std::sort because of memory allocation failure\n"); std::sort(__first, __last); return NULL; } //Calculating the size of each bin; this takes roughly 10% of runtime for (_RandomAccessIter current = __first; current != __last;) BinArray[(*(current++) >> LogDivisor) - divMin].uCount++; //Assign the bin positions BinArray[0].CurrentPosition = __first; for(u = 0; u < uBinCount - 1; u++) { BinArray[u + 1].CurrentPosition = BinArray[u].CurrentPosition + BinArray[u].uCount; BinArray[u].uCount = BinArray[u].CurrentPosition - __first; } BinArray[u].uCount = BinArray[u].CurrentPosition - __first; //Swap into place //This dominates runtime, especially in the swap; std::sort calls are the other main time-user //Note that this loop uses __first, so __first no longer points to the first element for(unsigned u = 0; __first != __last; ++u) { for(CurrentBin = (BinArray + ((*__first >> LogDivisor) - divMin)); (CurrentBin->uCount > u); CurrentBin = BinArray + ((*__first >> LogDivisor) - divMin)) std::swap(*__first, *(CurrentBin->CurrentPosition++)); //Now that we've found the item belonging in this position, increment the bucket count if(CurrentBin->CurrentPosition == __first++) ++(CurrentBin->CurrentPosition); } //If we've bucketsorted, the array is sorted and we should skip recursion if(!LogDivisor) { free(BinArray); return NULL; } return BinArray; } //Recursive sorting wrapper declaration template void SpreadSortRec(_RandomAccessIter __first, _RandomAccessIter __last); //Decides whether to recurse or use std::sort template inline void SpreadSortBins(_RandomAccessIter __first, _RandomAccessIter __last, size_t uBinCount, Bin<_RandomAccessIter> * BinArray, size_t uMaxCount) { size_t u; for(u = 0; u < uBinCount; u++) { size_t count = (BinArray[u].CurrentPosition - __first) - BinArray[u].uCount; //don't sort unless there are at least two items to compare if(count < 2) continue; if(count < uMaxCount) std::sort(__first + BinArray[u].uCount, BinArray[u].CurrentPosition); else SpreadSortRec<_RandomAccessIter, divType>(__first + BinArray[u].uCount, BinArray[u].CurrentPosition); } free(BinArray); } //Implementation for recursive sorting template inline void SpreadSortRec(_RandomAccessIter __first, _RandomAccessIter __last) { size_t uBinCount; unsigned logRange; Bin<_RandomAccessIter> * BinArray = SpreadSortCore<_RandomAccessIter, divType>(__first, __last, uBinCount, logRange); if(!BinArray) return; SpreadSortBins<_RandomAccessIter, divType>(__first, __last, uBinCount, BinArray, GetMaxCount((logRange), __last - __first)); } //Implementation for recursive sorting template inline void SpreadSort(_RandomAccessIter __first, _RandomAccessIter __last, divType) { size_t uBinCount; unsigned logRange; Bin<_RandomAccessIter> * BinArray = SpreadSortCore<_RandomAccessIter,divType>(__first, __last, uBinCount, logRange); if(!BinArray) return; SpreadSortBins<_RandomAccessIter, divType>(__first, __last, uBinCount, BinArray, 1 << (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT)); } //Top-level sorting call template inline void integer_sort(_RandomAccessIter __first, _RandomAccessIter __last) { //Don't sort if it's too small to optimize if(__last - __first < MIN_SORT_SIZE) std::sort(__first, __last); else SpreadSort(__first, __last, *__first >> 0); } #endif