【POJ】1007 DNA Sorting

Description

One measure of ``unsortedness'' in a sequence is the number of pairs of entries that are out of order with respect to each other. For instance, in the letter sequence ``DAABEC'', this measure is 5, since D is greater than four letters to its right and E is greater than one letter to its right. This measure is called the number of inversions in the sequence. The sequence ``AACEDGG'' has only one inversion (E and D)---it is nearly sorted---while the sequence ``ZWQM'' has 6 inversions (it is as unsorted as can be---exactly the reverse of sorted). 

You are responsible for cataloguing a sequence of DNA strings (sequences containing only the four letters A, C, G, and T). However, you want to catalog them, not in alphabetical order, but rather in order of ``sortedness'', from ``most sorted'' to ``least sorted''. All the strings are of the same length. 

Input

The first line contains two integers: a positive integer n (0 < n <= 50) giving the length of the strings; and a positive integer m (0 < m <= 100) giving the number of strings. These are followed by m lines, each containing a string of length n.

Output

Output the list of input strings, arranged from ``most sorted'' to ``least sorted''. Since two strings can be equally sorted, then output them according to the orginal order.

Sample Input

10 6
AACATGAAGG
TTTTGGCCAA
TTTGGCCAAA
GATCAGATTT
CCCGGGGGGA
ATCGATGCAT

Sample Output

CCCGGGGGGA
AACATGAAGG
GATCAGATTT
ATCGATGCAT
TTTTGGCCAA
TTTGGCCAAA
 

这个题主要考察求解逆序数的算法,另外有一个细节需要注意(题目描述中红色加粗表示),该细节表示最后的排序需要是稳定排序。

最开始拿到该题求逆序数直接暴力求解,两个循环,并且未考虑稳定排序的隐藏条件,代码如下:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

struct info
{
	int m_score;
	char *m_pstr;
};
int comp(const void *a, const void *b)
{
	return ((struct info *)a)->m_score - ((struct info *)b)->m_score;
}
int main()
{
	int len, n, i, j, prior;
	
	scanf("%d%d", &len, &n);
	
	struct info *infos = calloc(n, sizeof(struct info));
	for (i=0; i<n; ++i)
	{
		infos[i].m_score = 0;
		infos[i].m_pstr = NULL;
	}
	
	char *str = NULL;
	int *unsortednessPerChar = malloc(len * sizeof(int));
	
	int k = 0;
	while (k < n)
	{
		str = malloc((len + 1) * sizeof(char));
		scanf("%s", str);
		infos[k].m_pstr = str;
		
		memset(unsortednessPerChar, 0, len * sizeof(int));
		for (i=0; i<len-1; ++i)
		{
			for (j=i+1; j<len; ++j)
			{
				if (str[i] > str[j])
					++unsortednessPerChar[i];
			}
			
			infos[k].m_score += unsortednessPerChar[i];
		}
		
		++k;
	}
	
	qsort(infos, n, sizeof(struct info), comp);
	
	for (i=0; i<n; ++i)
	{
		printf("%s\n", infos[i].m_pstr);
	}
}

做完一两天后回过头来考虑优化,关于求逆序数可以采用分治策略,归并的思路完成,并注意到了隐藏稳定排序的条件,具体代码如下:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct info
{
	int m_index;
	int m_score;
	char m_str[60];
}infos[100];
int comp(const void *a, const void *b)
{
	// 快排不是稳定排序,但需要保证排序稳定
	if (((struct info *)a)->m_score == ((struct info *)b)->m_score)
		return ((struct info *)a)->m_index - ((struct info *)b)->m_index;
	else
		return ((struct info *)a)->m_score - ((struct info *)b)->m_score;
}

int gCount = 0;	//逆序数统计
void Merge(char data[], int low, int m, int high)
{
	char *temp = (char *)calloc(high - low + 1, sizeof(char));
	int i = low, j = m + 1, k = 0;
	while (i <= m && j <= high)
	{
		// notice: <= (两数相等不算逆序数)
		if (data[i] <= data[j])
		{
			temp[k++] = data[i++];
		}
		else
		{
			temp[k++] = data[j++];
			// data[j]小于data[i]至data[m]的数
			gCount += m - i + 1;
		}
	}
	
	while (i <= m)
		temp[k++] = data[i++];
	while (j <= high)
		temp[k++] = data[j++];
	
	memcpy(&data[low], temp, (high - low + 1) * sizeof(char));
	free(temp);
}
void MergeCount(char data[], int low, int high)
{	
	if (low == high)
		return;
	
	int m = (low + high) / 2;
	MergeCount(data, low, m);
	MergeCount(data, m + 1, high);
	Merge(data, low, m, high);
}

int main()
{
	int len, n;
	
	scanf("%d%d", &len, &n);
	
	char str[60];
	
	int i = 0;
	while (i < n)
	{
		scanf("%s", str);
		strcpy(infos[i].m_str, str);
		infos[i].m_index = i;
		
		gCount = 0;
		MergeCount(str, 0, len - 1);
		infos[i].m_score = gCount;
		
		++i;
	}
	
	qsort(infos, n, sizeof(struct info), comp);
	
	for (i=0; i<n; ++i)
		printf("%s\n", infos[i].m_str);
	
	return 0;
}

归并算法计算逆序数时,需要注意两数相等不算逆序数,使用<=符号条件:

// notice: <= (两数相等不算逆序数)
		if (data[i] <= data[j])
		{
			temp[k++] = data[i++];
		}
		else
		{
			temp[k++] = data[j++];
			// data[j]小于data[i]至data[m]的数
			gCount += m - i + 1;
		}

暴力解法的时间复杂度为O(n^2),归并解法的时间复杂度为O(n*logn),但是本题的数据量很小,并不能看出归并解法的速度优势。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值