Description
One measure of ``unsortedness'' in a sequence is the number of pairs of entries that are out of order with respect to each other. For instance, in the letter sequence ``DAABEC'', this measure is 5, since D is greater than four letters to its right and E is greater than one letter to its right. This measure is called the number of inversions in the sequence. The sequence ``AACEDGG'' has only one inversion (E and D)---it is nearly sorted---while the sequence ``ZWQM'' has 6 inversions (it is as unsorted as can be---exactly the reverse of sorted).
You are responsible for cataloguing a sequence of DNA strings (sequences containing only the four letters A, C, G, and T). However, you want to catalog them, not in alphabetical order, but rather in order of ``sortedness'', from ``most sorted'' to ``least sorted''. All the strings are of the same length.
You are responsible for cataloguing a sequence of DNA strings (sequences containing only the four letters A, C, G, and T). However, you want to catalog them, not in alphabetical order, but rather in order of ``sortedness'', from ``most sorted'' to ``least sorted''. All the strings are of the same length.
Input
The first line contains two integers: a positive integer n (0 < n <= 50) giving the length of the strings; and a positive integer m (0 < m <= 100) giving the number of strings. These are followed by m lines, each containing a string of length n.
Output
Output the list of input strings, arranged from ``most sorted'' to ``least sorted''.
Since two strings can be equally sorted, then output them according to the orginal order.
Sample Input
10 6 AACATGAAGG TTTTGGCCAA TTTGGCCAAA GATCAGATTT CCCGGGGGGA ATCGATGCAT
Sample Output
CCCGGGGGGA AACATGAAGG GATCAGATTT ATCGATGCAT TTTTGGCCAA TTTGGCCAAA
这个题主要考察求解逆序数的算法,另外有一个细节需要注意(题目描述中红色加粗表示),该细节表示最后的排序需要是稳定排序。
最开始拿到该题求逆序数直接暴力求解,两个循环,并且未考虑稳定排序的隐藏条件,代码如下:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
struct info
{
int m_score;
char *m_pstr;
};
int comp(const void *a, const void *b)
{
return ((struct info *)a)->m_score - ((struct info *)b)->m_score;
}
int main()
{
int len, n, i, j, prior;
scanf("%d%d", &len, &n);
struct info *infos = calloc(n, sizeof(struct info));
for (i=0; i<n; ++i)
{
infos[i].m_score = 0;
infos[i].m_pstr = NULL;
}
char *str = NULL;
int *unsortednessPerChar = malloc(len * sizeof(int));
int k = 0;
while (k < n)
{
str = malloc((len + 1) * sizeof(char));
scanf("%s", str);
infos[k].m_pstr = str;
memset(unsortednessPerChar, 0, len * sizeof(int));
for (i=0; i<len-1; ++i)
{
for (j=i+1; j<len; ++j)
{
if (str[i] > str[j])
++unsortednessPerChar[i];
}
infos[k].m_score += unsortednessPerChar[i];
}
++k;
}
qsort(infos, n, sizeof(struct info), comp);
for (i=0; i<n; ++i)
{
printf("%s\n", infos[i].m_pstr);
}
}
做完一两天后回过头来考虑优化,关于求逆序数可以采用分治策略,归并的思路完成,并注意到了隐藏稳定排序的条件,具体代码如下:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct info
{
int m_index;
int m_score;
char m_str[60];
}infos[100];
int comp(const void *a, const void *b)
{
// 快排不是稳定排序,但需要保证排序稳定
if (((struct info *)a)->m_score == ((struct info *)b)->m_score)
return ((struct info *)a)->m_index - ((struct info *)b)->m_index;
else
return ((struct info *)a)->m_score - ((struct info *)b)->m_score;
}
int gCount = 0; //逆序数统计
void Merge(char data[], int low, int m, int high)
{
char *temp = (char *)calloc(high - low + 1, sizeof(char));
int i = low, j = m + 1, k = 0;
while (i <= m && j <= high)
{
// notice: <= (两数相等不算逆序数)
if (data[i] <= data[j])
{
temp[k++] = data[i++];
}
else
{
temp[k++] = data[j++];
// data[j]小于data[i]至data[m]的数
gCount += m - i + 1;
}
}
while (i <= m)
temp[k++] = data[i++];
while (j <= high)
temp[k++] = data[j++];
memcpy(&data[low], temp, (high - low + 1) * sizeof(char));
free(temp);
}
void MergeCount(char data[], int low, int high)
{
if (low == high)
return;
int m = (low + high) / 2;
MergeCount(data, low, m);
MergeCount(data, m + 1, high);
Merge(data, low, m, high);
}
int main()
{
int len, n;
scanf("%d%d", &len, &n);
char str[60];
int i = 0;
while (i < n)
{
scanf("%s", str);
strcpy(infos[i].m_str, str);
infos[i].m_index = i;
gCount = 0;
MergeCount(str, 0, len - 1);
infos[i].m_score = gCount;
++i;
}
qsort(infos, n, sizeof(struct info), comp);
for (i=0; i<n; ++i)
printf("%s\n", infos[i].m_str);
return 0;
}
归并算法计算逆序数时,需要注意两数相等不算逆序数,使用<=符号条件:
// notice: <= (两数相等不算逆序数)
if (data[i] <= data[j])
{
temp[k++] = data[i++];
}
else
{
temp[k++] = data[j++];
// data[j]小于data[i]至data[m]的数
gCount += m - i + 1;
}
暴力解法的时间复杂度为O(n^2),归并解法的时间复杂度为O(n*logn),但是本题的数据量很小,并不能看出归并解法的速度优势。