#ifndef BOOST_SPREAD_SORT_H
#define BOOST_SPREAD_SORT_H
#include <algorithm>
#include <vector>
#include <cstring>
#ifndef BOOST_SPREADSORT_CONSTANTS
#define BOOST_SPREADSORT_CONSTANTS
namespace boost {
namespace detail {
//Tuning constants
//Sets the minimum number of items per bin.
static const unsigned LOG_MEAN_BIN_SIZE = 2;
//This should be tuned to your processor cache; if you go too large you get cache misses on bins
//The smaller this number, the less worst-case memory usage. If too small, too many recursions slow down spreadsort
static const unsigned MAX_SPLITS = 10;
//Used to force a comparison-based sorting for small bins, if it's faster. Minimum value 0
static const unsigned LOG_MIN_SPLIT_COUNT = 5;
//There is a minimum size below which it is not worth using spreadsort
static const long MIN_SORT_SIZE = 1;
//This is the constant on the log base n of m calculation; make this larger the faster std::sort is relative to spreadsort
static const unsigned LOG_CONST = 2;
}
}
#endif
namespace boost {
namespace detail {
//This only works on unsigned data types
template <typename T>
inline unsigned
rough_log_2_size(const T& input)
{
unsigned result = 0;
//The && is necessary on some compilers to avoid infinite loops; it doesn't significantly impair performance
while((input >> result) && (result < (8*sizeof(T)))) ++result;
return result;
}
//Gets the maximum size which we'll call spread_sort on to control worst-case performance
//Maintains both a minimum size to recurse and a check of distribution size versus count
//This is called for a set of bins, instead of bin-by-bin, to avoid performance overhead
inline size_t
get_max_count(unsigned log_range, size_t count)
{
unsigned divisor = rough_log_2_size(count);
//Making sure the divisor is positive
if(divisor > LOG_MEAN_BIN_SIZE)
divisor -= LOG_MEAN_BIN_SIZE;
else
divisor = 1;
unsigned relative_width = (LOG_CONST * log_range)/((divisor > MAX_SPLITS) ? MAX_SPLITS : divisor);
//Don't try to bitshift more than the size of an element
if((8*sizeof(size_t)) <= relative_width)
relative_width = (8*sizeof(size_t)) - 1;
return 1 << ((relative_width < (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT)) ?
(LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT) : relative_width);
}
//Find the minimum and maximum using <
template <class RandomAccessIter>
inline void
find_extremes(RandomAccessIter current, RandomAccessIter last, RandomAccessIter & max, RandomAccessIter & min)
{
min = max = current;
//Start from the second item, as max and min are initialized to the first
while(++current < last) {
if(*max < *current)
max = current;
else if(*current < *min)
min = current;
}
}
//Uses a user-defined comparison operator to find minimum and maximum
template <class RandomAccessIter, class compare>
inline void
find_extremes(RandomAccessIter current, RandomAccessIter last, RandomAccessIter & max, RandomAccessIter & min, compare comp)
{
min = max = current;
while(++current < last) {
if(comp(*max, *current))
max = current;
else if(comp(*current, *min))
min = current;
}
}
//Gets a non-negative right bit shift to operate as a logarithmic divisor
inline int
get_log_divisor(size_t count, unsigned log_range)
{
int log_divisor;
//If we can finish in one iteration without exceeding either (2 to the MAX_SPLITS) or n bins, do so
if((log_divisor = log_range - rough_log_2_size(count)) <= 0 && log_range < MAX_SPLITS)
log_divisor = 0;
else {
//otherwise divide the data into an optimized number of pieces
log_divisor += LOG_MEAN_BIN_SIZE;
if(log_divisor < 0)
log_divisor = 0;
//Cannot exceed MAX_SPLITS or cache misses slow down bin lookups dramatically
if((log_range - log_divisor) > MAX_SPLITS)
log_divisor = log_range - MAX_SPLITS;
}
return log_divisor;
}
template <class RandomAccessIter>
inline RandomAccessIter *
size_bins(std::vector<size_t> &bin_sizes, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count)
{
//Assure space for the size of each bin, followed by initializing sizes
if(bin_count > bin_sizes.size())
bin_sizes.resize(bin_count);
for(size_t u = 0; u < bin_count; u++)
bin_sizes[u] = 0;
//Make sure there is space for the bins
cache_end = cache_offset + bin_count;
if(cache_end > bin_cache.size())
bin_cache.resize(cache_end);
return &(bin_cache[cache_offset]);
}
//Implementation for recursive integer sorting
template <class RandomAccessIter, class div_type, class data_type>
inline void
spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
, std::vector<size_t> &bin_sizes)
{
//This step is roughly 10% of runtime, but it helps avoid worst-case behavior and improve behavior with real data
//If you know the maximum and minimum ahead of time, you can pass those values in and skip this step for the first iteration
RandomAccessIter max, min;
find_extremes(first, last, max, min);
//max and min will be the same (the first item) iff all values are equivalent
if(max == min)
return;
RandomAccessIter * target_bin;
unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(*max >> 0) - (*min >> 0)));
div_type div_min = *min >> log_divisor;
div_type div_max = *max >> log_divisor;
unsigned bin_count = div_max - div_min + 1;
unsigned cache_end;
RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
//Calculating the size of each bin; this takes roughly 10% of runtime
for (RandomAccessIter current = first; current != last;)
bin_sizes[(*(current++) >> log_divisor) - div_min]++;
//Assign the bin positions
bins[0] = first;
for(unsigned u = 0; u < bin_count - 1; u++)
bins[u + 1] = bins[u] + bin_sizes[u];
//Swap into place
//This dominates runtime, mostly in the swap and bin lookups
RandomAccessIter nextbinstart = first;
for(unsigned u = 0; u < bin_count - 1; ++u) {
RandomAccessIter * local_bin = bins + u;
nextbinstart += bin_sizes[u];
//Iterating over each element in this bin
for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
//Swapping elements in current into place until the correct element has been swapped in
for(target_bin = (bins + ((*current >> log_divisor) - div_min)); target_bin != local_bin;
target_bin = bins + ((*current >> log_divisor) - div_min)) {
//3-way swap; this is about 1% faster than a 2-way swap with integers
//The main advantage is less copies are involved per item put in the correct place
data_type tmp;
RandomAccessIter b = (*target_bin)++;
RandomAccessIter * b_bin = bins + ((*b >> log_divisor) - div_min);
if (b_bin != local_bin) {
RandomAccessIter c = (*b_bin)++;
tmp = *c;
*c = *b;
}
else
tmp = *b;
*b = *current;
*current = tmp;
}
}
*local_bin = nextbinstart;
}
bins[bin_count - 1] = last;
//If we've bucketsorted, the array is sorted and we should skip recursion
if(!log_divisor)
return;
//Recursing; log_divisor is the remaining range
size_t max_count = get_max_count(log_divisor, last - first);
RandomAccessIter lastPos = first;
for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
size_t count = bin_cache[u] - lastPos;
//don't sort unless there are at least two items to compare
if(count < 2)
continue;
//using std::sort if its worst-case is better
if(count < max_count)
std::sort(lastPos, bin_cache[u]);
else
spread_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
}
}
//Generic bitshift-based 3-way swapping code
template <class RandomAccessIter, class div_type, class data_type, class right_shift>
inline void inner_swap_loop(RandomAccessIter * bins, const RandomAccessIter & nextbinstart, unsigned ii, right_shift &shift
, const unsigned log_divisor, const div_type div_min)
{
RandomAccessIter * local_bin = bins + ii;
for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
for(RandomAccessIter * target_bin = (bins + (shift(*current, log_divisor) - div_min)); target_bin != local_bin;
target_bin = bins + (shift(*current, log_divisor) - div_min)) {
data_type tmp;
RandomAccessIter b = (*target_bin)++;
RandomAccessIter * b_bin = bins + (shift(*b, log_divisor) - div_min);
//Three-way swap; if the item to be swapped doesn't belong in the current bin, swap it to where it belongs
if (b_bin != local_bin) {
RandomAccessIter c = (*b_bin)++;
tmp = *c;
*c = *b;
}
//Note: we could increment current once the swap is done in this case, but that seems to impair performance
else
tmp = *b;
*b = *current;
*current = tmp;
}
}
*local_bin = nextbinstart;
}
//Standard swapping wrapper for ascending values
template <class RandomAccessIter, class div_type, class data_type, class right_shift>
inline void swap_loop(RandomAccessIter * bins, RandomAccessIter & nextbinstart, unsigned ii, right_shift &shift
, const std::vector<size_t> &bin_sizes, const unsigned log_divisor, const div_type div_min)
{
nextbinstart += bin_sizes[ii];
inner_swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, log_divisor, div_min);
}
//Functor implementation for recursive sorting
template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
inline void
spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
, std::vector<size_t> &bin_sizes, right_shift shift, compare comp)
{
RandomAccessIter max, min;
find_extremes(first, last, max, min, comp);
if(max == min)
return;
unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(shift(*max, 0)) - (shift(*min, 0))));
div_type div_min = shift(*min, log_divisor);
div_type div_max = shift(*max, log_divisor);
unsigned bin_count = div_max - div_min + 1;
unsigned cache_end;
RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
//Calculating the size of each bin
for (RandomAccessIter current = first; current != last;)
bin_sizes[shift(*(current++), log_divisor) - div_min]++;
bins[0] = first;
for(unsigned u = 0; u < bin_count - 1; u++)
bins[u + 1] = bins[u] + bin_sizes[u];
//Swap into place
RandomAccessIter nextbinstart = first;
for(unsigned u = 0; u < bin_count - 1; ++u)
swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, u, shift, bin_sizes, log_divisor, div_min);
bins[bin_count - 1] = last;
//If we've bucketsorted, the array is sorted and we should skip recursion
if(!log_divisor)
return;
//Recursing
size_t max_count = get_max_count(log_divisor, last - first);
RandomAccessIter lastPos = first;
for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
size_t count = bin_cache[u] - lastPos;
if(count < 2)
continue;
if(count < max_count)
std::sort(lastPos, bin_cache[u], comp);
else
spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift, compare>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift, comp);
}
}
//Functor implementation for recursive sorting with only Shift overridden
template <class RandomAccessIter, class div_type, class data_type, class right_shift>
inline void
spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
, std::vector<size_t> &bin_sizes, right_shift shift)
{
RandomAccessIter max, min;
find_extremes(first, last, max, min);
if(max == min)
return;
unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(shift(*max, 0)) - (shift(*min, 0))));
div_type div_min = shift(*min, log_divisor);
div_type div_max = shift(*max, log_divisor);
unsigned bin_count = div_max - div_min + 1;
unsigned cache_end;
RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
//Calculating the size of each bin
for (RandomAccessIter current = first; current != last;)
bin_sizes[shift(*(current++), log_divisor) - div_min]++;
bins[0] = first;
for(unsigned u = 0; u < bin_count - 1; u++)
bins[u + 1] = bins[u] + bin_sizes[u];
//Swap into place
RandomAccessIter nextbinstart = first;
for(unsigned ii = 0; ii < bin_count - 1; ++ii)
swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min);
bins[bin_count - 1] = last;
//If we've bucketsorted, the array is sorted and we should skip recursion
if(!log_divisor)
return;
//Recursing
size_t max_count = get_max_count(log_divisor, last - first);
RandomAccessIter lastPos = first;
for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
size_t count = bin_cache[u] - lastPos;
if(count < 2)
continue;
if(count < max_count)
std::sort(lastPos, bin_cache[u]);
else
spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift);
}
}
//Holds the bin vector and makes the initial recursive call
template <class RandomAccessIter, class div_type, class data_type>
inline void
spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type)
{
std::vector<size_t> bin_sizes;
std::vector<RandomAccessIter> bin_cache;
spread_sort_rec<RandomAccessIter, div_type, data_type>(first, last, bin_cache, 0, bin_sizes);
}
template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
inline void
spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift, compare comp)
{
std::vector<size_t> bin_sizes;
std::vector<RandomAccessIter> bin_cache;
spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift, compare>(first, last, bin_cache, 0, bin_sizes, shift, comp);
}
template <class RandomAccessIter, class div_type, class data_type, class right_shift>
inline void
spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift)
{
std::vector<size_t> bin_sizes;
std::vector<RandomAccessIter> bin_cache;
spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(first, last, bin_cache, 0, bin_sizes, shift);
}
}
//Top-level sorting call for integers
template <class RandomAccessIter>
inline void integer_sort(RandomAccessIter first, RandomAccessIter last)
{
detail::spread_sort(first, last, *first >> 0, *first);
}
//integer_sort with functors
template <class RandomAccessIter, class right_shift, class compare>
inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
right_shift shift, compare comp) {
detail::spread_sort(first, last, shift(*first, 0), *first, shift, comp);
}
//integer_sort with right_shift functor
template <class RandomAccessIter, class right_shift>
inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
right_shift shift) {
detail::spread_sort(first, last, shift(*first, 0), *first, shift);
}
}
#endif
#include <fstream>
using namespace std;
#define DIM 500005
int main ()
{
ifstream fin ("algsort.in");
ofstream fout ("algsort.out");
int v[DIM];
int N,i;
fin>>N;
for (i=1; i<=N; ++i)
fin>>v[i];
boost::integer_sort (v+1,v+N+1);
for (i=1; i<=N; ++i)
fout<<v[i]<<" ";
return 0;
}