#!/usr/bin/perl -w
$usage = "usage: ./tune.pl [-real] [-skip_verify] [-verbose] [-firstdim] [-multiple_iterations] [fileSize]\n";
#testing sorting on 40 million elements by default
#don't test on below 2^22 (4 million) elements as that is the minimum
#for MAX_SPLITS of 11 to be efficient
$defFileSize = 40000000;
$loopCount = 1;
$realtimes = 0;
$verifycorrect = 1;
$verbose = 0;
$exename = "spreadsort";
$iter_count = 1;

for (my $ii = 0; $ii < @ARGV; $ii++) {
	my $currArg = $ARGV[$ii];
	if ($currArg =~ /^-help$/) {
	    print STDERR $usage;
	    exit(0);
	}
	#verification roughly doubles the runtime of this script, but it does make sure that results are correct
	if ($currArg =~ /^-skip_verify$/) {
	    $verifycorrect = 0;
	#use real times only, don't use weighting and special-case tests; this saves about 5/6 of the script runtime
	#but results are substantially different
	} elsif ($currArg =~ /^-real$/) {
	    $realtimes = 1;
	} elsif ($currArg =~ /^-verbose$/) {
	    $verbose = 1;
	} elsif ($currArg =~ /^-firstdim$/) {
	    $exename = "firstdim";
	#runs until we converge on a precise set of values; defaults off because of runtime
	} elsif ($currArg =~ /^-multiple_iterations$/) {
	    $iter_count = 4;
	} elsif ($currArg =~ /^-/) {
	    print STDERR $usage;
	    exit(0);
	} else {
		$defFileSize = $currArg;
	}
}
$fileSize = $defFileSize;

#these are reasonable values
$max_splits = 9;
$log_const = 3;
$log_mean_bin_size = 0;
$log_min_size = 8;

#this value is a minimum to get decent performance of file I/O
$min_sort_size = 100;
$std = "";

system("make randomgen > /dev/null");
#Tuning to get convergence, maximum of 10 iterations
$changed = 1;
my $ii = 0;
for($ii = 0; $changed and $ii < $iter_count; $ii++) {
	$changed = 0;
	print STDOUT "Tuning max_splits\n";
	TuneVariable(\$max_splits, 7, 20);
	print STDOUT "Tuning log_const\n";
	TuneVariable(\$log_const, 1, 10);
	print STDOUT "Tuning log of the minimum count for recursion\n";
	TuneVariable(\$log_min_size, $log_mean_bin_size + 1, 16);
	print STDOUT "Tuning log_mean_bin_size\n";
	TuneVariable(\$log_mean_bin_size, 0, $log_min_size - 1);
}

#After optimizations for large datasets are complete, see how small of a dataset can be sped up
print STDOUT "Tuning minimum sorting size\n";
TuneMinSize();
print STDOUT "Writing results\n";

#Doing a final run with final settings to compare sort times
#also verifying correctness of results for the last run
$verifycorrect = 1;
$loopCount = 1;
$fileSize = $defFileSize;
$std = "";
my $lastTime = SumTimes();
$std = "-std";
my $baseTime = SumTimes();
print STDOUT "std::sort time: $baseTime\n";
print STDOUT "Spreadsort time: $lastTime\n";

sub WriteConstants {
    # deleting the file
    $const_file = 'Constants.h';
    @cannot = grep {not unlink} $const_file;
    die "$0: could not unlink @cannot\n" if @cannot;

    # writing the results back to the original file name
    unless(open(CONSTANTS, ">$const_file")) {
      print STDERR "Can't open output file: $const_file: $!\n";
      exit;
    }
	print CONSTANTS "//Tuning constants\n";
	print CONSTANTS	"//Sets the minimum number of items per bin.\n";
	print CONSTANTS "static const unsigned LOG_MEAN_BIN_SIZE = $log_mean_bin_size;\n";
	print CONSTANTS "//This should be tuned to your processor cache; if you go too large you get cache misses on bins\n";
	print CONSTANTS "//The smaller this number, the less worst-case memory usage.  If too small, too many recursions slow down Spreadsort\n";
	print CONSTANTS "static const unsigned MAX_SPLITS = $max_splits;\n";
	print CONSTANTS "//Used to force a comparison-based sorting for small bins, if it's faster.  Minimum value 0\n";
	$log_min_split_count = $log_min_size - $log_mean_bin_size;
	print CONSTANTS "static const unsigned LOG_MIN_SPLIT_COUNT = $log_min_split_count;\n";
	print CONSTANTS "//There is a minimum size below which it is not worth using Spreadsort\n";
	print CONSTANTS "static const unsigned MIN_SORT_SIZE = $min_sort_size;\n";
	print CONSTANTS "//This is the constant on the log base n of m calculation; make this larger the faster std::sort is relative to spreadsort\n";
	print CONSTANTS "static const unsigned LOG_CONST = $log_const;\n";
	close CONSTANTS;
	system("make $exename > /dev/null");
}

sub CheckTime {
	my $time_file = "time.txt";
	#use the line below on systems that can't overwrite.
	#system("rm -f $time_file");
	system("./$exename $loopCount $std > $time_file");
	#verifying correctness
	if(not $std and $verifycorrect) {
		system("./$exename $loopCount -std > /dev/null");
		system("diff SpreadSortOut.txt StandardSortOut.txt\n");
	}
	unless(open(CODE, $time_file)) {
		print STDERR "Could not open file: $time_file: $!\n";
		exit;
	}
	while($line = <CODE>) {
	 @parts = split("time", $line);
	 if(@parts > 1) {
		return $parts[1];
	 }		
	}
}

sub SumTimes {
	my $time = 0;
	my $ii = 1;
	#if we're only using real times, don't bother with the corner-cases
	if($realtimes) {
		$ii = 8;
	}
	for (; $ii <= 16; $ii++) {
		system("./randomgen $ii $ii $fileSize");
		if($realtimes) {
			$time += CheckTime();
		} else {
			#tests with higher levels of randomness are given higher priority in timing results
			$time += 2 * $ii * CheckTime();
			system("./randomgen 0 $ii $fileSize");
			$time += $ii * CheckTime();
			system("./randomgen $ii 0 $fileSize");
			$time += $ii * CheckTime();
			
		}
	}
	return $time;
}

sub TuneVariable {
	my ($tunevar, $beginval, $endval) = @_;
	my $best_val = $$tunevar;
	my $besttime = 0;
	my $startval = $$tunevar;
	for ($$tunevar = $beginval; $$tunevar <= $endval; $$tunevar++) {
		WriteConstants();
		$sumtime = SumTimes();
		#If this value is better, use it.  If this is the start value and it's just as good, use the startval
		if(not $besttime or ($sumtime < $besttime) or (($besttime == $sumtime) and ($$tunevar == $startval))) {
			$besttime = $sumtime;
			$best_val = $$tunevar;
		}
		print STDOUT "Value: $$tunevar Time: $sumtime\n" if $verbose;
	}
	$$tunevar = $best_val;
	print STDOUT "Best Value: $best_val\n";
	if($best_val != $startval) {
		$changed = 1;
	}
}

sub TuneMinSize {
	for (; $min_sort_size <= $defFileSize; $min_sort_size *= 2) {
		$loopCount = $defFileSize/$min_sort_size;
		$fileSize = $min_sort_size;
		WriteConstants();
		$std = "";
		$sumtime = SumTimes();
		$std = "-std";
		$stdtime = SumTimes();
		print STDOUT "Size: $min_sort_size Spreadsort Time: $sumtime std::sort Time: $stdtime\n";
		last if($stdtime > $sumtime);
	}
}