/*
Generic version of spreadsort
Copyright (c) 2002-2008 Steven J. Ross

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

*/


#ifndef __SPREADSORT__
#define __SPREADSORT__
#include "Constants.h"
#include <algorithm>

template <typename T> 
struct Bin{
	size_t uCount;
	T * CurrentPosition;
};

//Checks whether two items are equal using the < operator
template<typename T>
inline bool
AreEquivalent(const T& lhs, const T& rhs) {
	return !(lhs < rhs) && !(rhs < lhs);
}

//A quick way to take a log base 2 of a number
template <typename T>
inline unsigned 
RoughLog2(const T& input) 
{
	unsigned cResult = 0;
	//The && is necessary on some compilers to avoid infinite loops; it doesn't significantly impair performance
	if(input >= 0)
		while((input >> cResult) && (cResult < (8*sizeof(T)))) cResult++;
	else
		while(((input >> cResult) < -1) && (cResult < (8*sizeof(T)))) cResult++;
	return cResult;
}

//This only works on unsigned data types
template <typename T>
inline unsigned 
RoughLog2Size(const T& input) 
{
	unsigned cResult = 0;
	//The && is necessary on some compilers to avoid infinite loops; it doesn't significantly impair performance
	while((input >> cResult) && (cResult < (8*sizeof(T)))) cResult++;
	return cResult;
}

//Gets the maximum size which we'll call spreadsort on
//Maintains both a minimum size to recurse and a check of distribution size versus count
//This is called for a set of bins, instead of bin-by-bin, to avoid performance overhead
template <typename T>
inline size_t
GetMaxCount(unsigned logRange, unsigned uCount)
{
	unsigned logSize = RoughLog2Size(uCount);
	unsigned uRelativeWidth = (LOG_CONST * logRange)/((logSize > MAX_SPLITS) ? MAX_SPLITS : logSize);
	//Don't try to bitshift more than the size of an element
	if((8*sizeof(T)) <= uRelativeWidth)
		uRelativeWidth = (8*sizeof(T)) - 1;
	return 1 << ((uRelativeWidth < (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT)) ? 
		(LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT) :  uRelativeWidth);
}

//Find the minimum and maximum
template <typename T>
void 
FindExtremes(T *Array, size_t uCount, T & piMax, T & piMin)
{
	size_t u = 1;
	piMin = piMax = Array[0];
	for(; u < uCount; ++u){
		if(piMax < Array[u])
			piMax=Array[u];
		else if(Array[u] < piMin)
			piMin= Array[u];
	}
}

//The sorting implementation
template <typename T>
Bin<T> *
SpreadSortCore(T *Array, size_t uCount, size_t & uBinCount, T &iMax, T &iMin)
{
	//This step is roughly 10% of runtime, but it helps avoid worst-case behavior and improve behavior with real data
	//If you know the maximum and minimum ahead of time, you can pass those values in and skip this step for the first iteration
	FindExtremes(Array, uCount, iMax, iMin);
	if(AreEquivalent(iMax, iMin))
		return NULL;
	long divMin,divMax;
	size_t u;
	int LogDivisor;
	Bin<T> * BinArray;
	Bin<T> * CurrentBin;
	unsigned logRange;
	logRange = RoughLog2Size((size_t)(iMax >> 0) - (iMin >> 0));
	if((LogDivisor = logRange - RoughLog2Size(uCount) + LOG_MEAN_BIN_SIZE) < 0)
		LogDivisor = 0;
	//The below if statement is only necessary on systems with high memory latency relative to their processor speed
	//Otherwise known as modern processors
	if((logRange - LogDivisor) > MAX_SPLITS)
		LogDivisor = logRange - MAX_SPLITS;
	divMin = iMin >> LogDivisor;
	divMax = iMax >> LogDivisor;
	uBinCount = divMax - divMin + 1;
	
	//Allocate the bins and determine their sizes
	BinArray = (Bin<T> *) calloc(uBinCount, sizeof(Bin<T>));
	//Memory allocation failure check and clean return with sorted results
    if(!BinArray) {
		printf("Using std::sort because of memory allocation failure\n");
		std::sort(Array, Array + uCount);
		return NULL;
	}
		
	//Calculating the size of each bin; this takes roughly 10% of runtime
	for(u = 0; u < uCount; ++u)
		BinArray[(Array[u] >> LogDivisor) - divMin].uCount++;
	//Assign the bin positions
	BinArray[0].CurrentPosition = Array;
	for(u = 0; u < uBinCount - 1; u++) {
		BinArray[u + 1].CurrentPosition = BinArray[u].CurrentPosition + BinArray[u].uCount;
		BinArray[u].uCount = BinArray[u].CurrentPosition - Array;
	}
	BinArray[u].uCount = BinArray[u].CurrentPosition - Array;
	
	//Swap into place
	//This dominates runtime, especially in the swap; std::sort calls are the other main time-user
	for(u = 0; u < uCount; ++u) {
		for(CurrentBin = (BinArray + ((Array[u] >> LogDivisor) - divMin));  (CurrentBin->uCount > u); 
			CurrentBin = BinArray + ((Array[u] >> LogDivisor) - divMin))
				std::swap(Array[u], *(CurrentBin->CurrentPosition++));
		//Now that we've found the item belonging in this position, increment the bucket count
		if(CurrentBin->CurrentPosition == Array + u)
			++(CurrentBin->CurrentPosition);
	}
	
	//If we've bucketsorted, the array is sorted and we should skip recursion
	if(!LogDivisor) {
		free(BinArray);
		return NULL;
	}
	return BinArray;
}

//Recursive sorting wrapper declaration
template <typename T>
void SpreadSortRec(T *Array, size_t uCount);

//Decides whether to recurse or use std::sort
template <typename T>
void
SpreadSortBins(T *Array, size_t uCount, size_t uBinCount, const T &iMax
				, const T &iMin, Bin<T> * BinArray, size_t uMaxCount)
{
	size_t u;
	for(u = 0; u < uBinCount; u++) {
		size_t count = (BinArray[u].CurrentPosition - Array) - BinArray[u].uCount;
		//don't sort unless there is at least two items to compare
		if(count < 2)
			continue;
		if(count < uMaxCount)
			std::sort(Array + BinArray[u].uCount, BinArray[u].CurrentPosition);
		else
			SpreadSortRec(Array + BinArray[u].uCount, count);
	}
	free(BinArray);
}

//Implementation for recursive sorting
template <typename T>
void 
SpreadSortRec(T *Array, size_t uCount)
{
	T iMax, iMin;
	size_t uBinCount;
	Bin<T> * BinArray = SpreadSortCore(Array, uCount, uBinCount, iMax, iMin);
	if(!BinArray)
		return;
	SpreadSortBins(Array, uCount, uBinCount, iMax, iMin, BinArray, GetMaxCount<T>(RoughLog2Size((size_t)(iMax >> 0)-(iMin >> 0)), uCount));
}

//Top-level sorting call
template <typename T>
void 
integer_sort(T *Array, size_t uCount) 
{
	//Don't sort if it's too small to optimize
	if(uCount < MIN_SORT_SIZE) {
		std::sort(Array, Array + uCount);
		return;
	}
		
	size_t uBinCount;
	T iMax, iMin;
	Bin<T> * BinArray = SpreadSortCore(Array, uCount, uBinCount, iMax, iMin);
	if(!BinArray)
		return;
	SpreadSortBins(Array, uCount, uBinCount, iMax, iMin, BinArray, 1 << (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT));
}

#endif