namespace pratique
{
namespace details
{
const size_t CUTOFF = 15;
int compare(const char * s1, const char * s2, size_t d)
{
int ret = 0;
s1 += d;
s2 += d;
while (*s1 != 0 && *s2 != 0) {
if (*s1 > *s2) {
ret = 1;
break;
}
else if (*s1 < *s2) {
ret = -1;
break;
}
++s1;
++s2;
}
if (ret == 0) {
if (*s1 != 0) {
ret = 1;
}
else if (*s2 != 0) {
ret = -1;
}
}
return ret;
}
void insertion_sort(std::vector<const char *>::iterator begin, std::vector<const char *>::iterator end, size_t n, size_t d)
{
std::vector<const char *>::iterator last = begin + n - 1, i, j;
for (i = begin; i < last; ++i) {
j = i + 1;
auto v = *j;
while (j > begin && compare(*(j - 1), v, d) > 0) {
auto k = j - 1;
*j = *k;
j = k;
}
*j = v;
}
}
void msd_sort(std::vector<const char *>::iterator begin, std::vector<const char *>::iterator end, size_t d, std::vector<const char *> & buffer)
{
size_t n = end - begin;
if (n > CUTOFF) {
int ref[256] = { 0 };
{
for (auto i = begin; i < end; ++i) {
int c = static_cast<unsigned char>((*i)[d]);
++ref[c];
}
int sum = ref[0];
for (size_t i = 1, e = _countof(ref); i < e; ++i) {
sum += ref[i];
if (ref[i] != 0) {
ref[i] = sum;
}
}
}
for (auto i = begin + n; i > begin;) {
--i;
int c = static_cast<unsigned char>((*i)[d]);
int j = --ref[c];
buffer[j] = *i;
}
std::copy(buffer.begin(), buffer.begin() + n, begin);
{
int i = ref[0], j;
for (int k = 1, ke = _countof(ref); k < ke; ++k) {
j = ref[k];
if (i < j) {
const char * v = *(begin + i);
if (v[d] != '\0') {
size_t diff = static_cast<size_t>(j - i);
if (diff > CUTOFF) {
msd_sort( begin + i, begin + j, d + 1, buffer );
}
else if (diff > 1) {
insertion_sort( begin + i, begin + j, diff, d + 1 );
}
}
i = j;
}
}
j = n;
const char * v = *( begin + i );
if ( v[ d ] != '\0' ) {
size_t diff = static_cast<size_t>( j - i );
if ( diff > CUTOFF ) {
msd_sort( begin + i, begin + j, d + 1, buffer );
} else if ( diff > 1 ) {
insertion_sort( begin + i, begin + j, diff, d + 1 );
}
}
}
}
else {
insertion_sort( begin, end, n, d );
}
}
}
std::vector<const char *> msd_sort(std::vector<std::string> const & ss)
{
size_t n = ss.size();
std::vector<const char *> buffer(n, NULL), ret(n, NULL);
for (size_t i = 0; i < n; ++i) {
ret[i] = ss[i].c_str();
}
if (n > 1) {
details::msd_sort(ret.begin(), ret.end(), 0, buffer);
}
return std::move(ret);
}
}
The testing code is:
std::vector<const char *> standard_sort(std::vector<std::string> const & ss)
{
size_t n = ss.size();
std::vector<const char *> ret(n, NULL);
for (size_t i = 0; i < n; ++i) {
ret[i] = ss[i].c_str();
}
if (n > 1) {
std::sort(ret.begin(), ret.end(), [](const char * s1, const char * s2) { return strcmp(s1, s2) < 0; });
}
return std::move(ret);
}
bool equals(std::vector<const char *> const & vs1, std::vector<const char *> const & vs2)
{
bool eq = vs1.size() == vs2.size();
if (eq) {
for (size_t i = 0, e = vs1.size(); i < e; ++i) {
if (strcmp(vs1[i], vs2[i]) != 0) {
printf( "The %dth elements are different: %s %s\n", i + 1, vs1[ i ], vs2[ i ] );
eq = false;
break;
}
}
}
return eq;
}
void msd_sort_test()
{
printf("\nMSD sort test cases are being executed......\n");
{
std::vector<std::string> input;
for (size_t i = 0; i < 127; ++i) {
input.emplace_back( std::string( "" ) );
}
for ( size_t i = 0; i < 127; ++i ) {
input.emplace_back( std::string( "A" ) );
}
msd_sort( input );
}
{
std::vector<std::string> ss = { "she", "by", "the", "sea", "shells", "are", "seashells", "surely", "she", "sea", "short", "sells", "about", "method", "divide", "way", "key", "after", "few", "basic", "idea", "static", "final", "she", "by", "the", "sea", "shells", "are", "seashells", "surely", "she", "sea", "short", "sells", "about", "method", "divide", "way", "key", "see", "shy", "swing", "short", "shore", "shop", "sheep"};
auto r1 = msd_sort(ss);
auto r2 = standard_sort(ss);
verify(equals(r1, r2), stderr, "The implementation of msd_sort is wrong\n");
}
for ( size_t i = 0, e = number_of_primes; i < e; ++i ) {
size_t string_length_limit = 16;
std::vector<char> buffer(string_length_limit + 1, '\0');
size_t items = global_primes[i] / string_length_limit;
std::vector<std::string> input;
input.reserve(items);
for (size_t j = 0; j < items; ++j) {
size_t ke = pratique::random(static_cast<size_t>(0), string_length_limit);
for (size_t k = 0; k < ke; ++k) {
buffer[k] = pratique::random('A', 'z');
}
buffer[ke] = '\0';
input.emplace_back(std::string(&buffer[0]));
}
auto r1 = report_performance("MSD sort", msd_sort, input);
auto r2 = report_performance("standard sort", standard_sort, input);
verify(equals(r1, r2), stderr, "The implementation of msd_sort is wrong\n");
}
}