/* Generic version of spreadsort Copyright (c) 2002-2008 Steven J. Ross Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __SPREADSORT__ #define __SPREADSORT__ #include "Constants.h" #include template struct Bin{ size_t uCount; T * CurrentPosition; }; //A quick way to take a log base 2 of a number template inline unsigned RoughLog2(const T& input) { unsigned cResult = 0; //The && is necessary on some compilers to avoid infinite loops; it doesn't significantly impair performance if(input >= 0) while((input >> cResult) && (cResult < (8*sizeof(T)))) cResult++; else while(((input >> cResult) < -1) && (cResult < (8*sizeof(T)))) cResult++; return cResult; } //This only works on unsigned data types template inline unsigned RoughLog2Size(const T& input) { unsigned cResult = 0; //The && is necessary on some compilers to avoid infinite loops; it doesn't significantly impair performance while((input >> cResult) && (cResult < (8*sizeof(T)))) cResult++; return cResult; } //Gets the maximum size which we'll call spreadsort on //Maintains both a minimum size to recurse and a check of distribution size versus count //This is called for a set of bins, instead of bin-by-bin, to avoid performance overhead template inline size_t GetMaxCount(unsigned logRange, unsigned uCount) { unsigned logSize = RoughLog2Size(uCount); unsigned uRelativeWidth = (LOG_CONST * logRange)/((logSize > MAX_SPLITS) ? MAX_SPLITS : logSize); //Don't try to bitshift more than the size of an element if((8*sizeof(T)) <= uRelativeWidth) uRelativeWidth = (8*sizeof(T)) - 1; return 1 << ((uRelativeWidth < (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT)) ? (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT) : uRelativeWidth); } //Find the minimum and maximum template void FindExtremes(T *Array, size_t uCount, T & piMax, T & piMin) { size_t u = 1; piMin = piMax = Array[0]; for(; u < uCount; ++u){ if(Array[u] > piMax) piMax=Array[u]; else if(Array[u] < piMin) piMin= Array[u]; } } //The sorting implementation template Bin * SpreadSortCore(T *Array, size_t uCount, size_t & uBinCount, T &iMax, T &iMin) { //This step is roughly 10% of runtime, but it helps avoid worst-case behavior and improve behavior with real data //If you know the maximum and minimum ahead of time, you can pass those values in and skip this step for the first iteration FindExtremes(Array, uCount, iMax, iMin); if(iMax == iMin) return NULL; T divMin,divMax; size_t u; int LogDivisor; Bin * BinArray; Bin * CurrentBin; unsigned logRange; logRange = RoughLog2Size((size_t)iMax-iMin); if((LogDivisor = logRange - RoughLog2Size(uCount) + LOG_MEAN_BIN_SIZE) < 0) LogDivisor = 0; //The below if statement is only necessary on systems with high memory latency relative to their processor speed //Otherwise known as modern processors if((logRange - LogDivisor) > MAX_SPLITS) LogDivisor = logRange - MAX_SPLITS; divMin = iMin >> LogDivisor; divMax = iMax >> LogDivisor; uBinCount = divMax - divMin + 1; //Allocate the bins and determine their sizes BinArray = (Bin *) calloc(uBinCount, sizeof(Bin)); //Memory allocation failure check and clean return with sorted results if(!BinArray) { printf("Using std::sort because of memory allocation failure\n"); std::sort(Array, Array + uCount); return NULL; } //Calculating the size of each bin; this takes roughly 10% of runtime for(u = 0; u < uCount; ++u) BinArray[(Array[u] >> LogDivisor) - divMin].uCount++; //Assign the bin positions BinArray[0].CurrentPosition = Array; for(u = 0; u < uBinCount - 1; u++) { BinArray[u + 1].CurrentPosition = BinArray[u].CurrentPosition + BinArray[u].uCount; BinArray[u].uCount = BinArray[u].CurrentPosition - Array; } BinArray[u].uCount = BinArray[u].CurrentPosition - Array; //Swap into place //This dominates runtime, especially in the swap; std::sort calls are the other main time-user for(u = 0; u < uCount; ++u) { for(CurrentBin = (BinArray + ((Array[u] >> LogDivisor) - divMin)); (CurrentBin->uCount > u); CurrentBin = BinArray + ((Array[u] >> LogDivisor) - divMin)) std::swap(Array[u], *(CurrentBin->CurrentPosition++)); //Now that we've found the item belonging in this position, increment the bucket count if(CurrentBin->CurrentPosition == Array + u) ++(CurrentBin->CurrentPosition); } //If we've bucketsorted, the array is sorted and we should skip recursion if(!LogDivisor) { free(BinArray); return NULL; } return BinArray; } //Recursive sorting wrapper declaration template void SpreadSortRec(T *Array, size_t uCount); //Decides whether to recurse or use std::sort template void SpreadSortBins(T *Array, size_t uCount, size_t uBinCount, const T &iMax , const T &iMin, Bin * BinArray, size_t uMaxCount) { size_t u; for(u = 0; u < uBinCount; u++) { size_t count = (BinArray[u].CurrentPosition - Array) - BinArray[u].uCount; //don't sort unless there is at least two items to compare if(count < 2) continue; if(count < uMaxCount) std::sort(Array + BinArray[u].uCount, BinArray[u].CurrentPosition); else SpreadSortRec(Array + BinArray[u].uCount, count); } free(BinArray); } //Implementation for recursive sorting template void SpreadSortRec(T *Array, size_t uCount) { T iMax, iMin; size_t uBinCount; Bin * BinArray = SpreadSortCore(Array, uCount, uBinCount, iMax, iMin); if(!BinArray) return; SpreadSortBins(Array, uCount, uBinCount, iMax, iMin, BinArray, GetMaxCount(RoughLog2Size((size_t)iMax-iMin), uCount)); } //Top-level sorting call template void integer_sort(T *Array, size_t uCount) { //Don't sort if it's too small to optimize if(uCount < MIN_SORT_SIZE) { std::sort(Array, Array + uCount); return; } size_t uBinCount; T iMax, iMin; Bin * BinArray = SpreadSortCore(Array, uCount, uBinCount, iMax, iMin); if(!BinArray) return; SpreadSortBins(Array, uCount, uBinCount, iMax, iMin, BinArray, 1 << (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT)); } #endif