【POJ】1007 DNA Sorting

最新推荐文章于 2018-05-02 17:53:23 发布

置顶 MrKnight

最新推荐文章于 2018-05-02 17:53:23 发布

阅读量783

点赞数

分类专栏： POJ 编程学习笔记文章标签： POJ 1007 DNA Sorting 逆序数

本文链接：https://blog.csdn.net/MrKnight/article/details/8820882

版权

编程学习笔记同时被 2 个专栏收录

15 篇文章 0 订阅

订阅专栏

POJ

5 篇文章 0 订阅

订阅专栏

Description

One measure of ``unsortedness'' in a sequence is the number of pairs of entries that are out of order with respect to each other. For instance, in the letter sequence ``DAABEC'', this measure is 5, since D is greater than four letters to its right and E is greater than one letter to its right. This measure is called the number of inversions in the sequence. The sequence ``AACEDGG'' has only one inversion (E and D)---it is nearly sorted---while the sequence ``ZWQM'' has 6 inversions (it is as unsorted as can be---exactly the reverse of sorted).

You are responsible for cataloguing a sequence of DNA strings (sequences containing only the four letters A, C, G, and T). However, you want to catalog them, not in alphabetical order, but rather in order of ``sortedness'', from ``most sorted'' to ``least sorted''. All the strings are of the same length.

Input

The first line contains two integers: a positive integer n (0 < n <= 50) giving the length of the strings; and a positive integer m (0 < m <= 100) giving the number of strings. These are followed by m lines, each containing a string of length n.

Output

Output the list of input strings, arranged from ``most sorted'' to ``least sorted''. Since two strings can be equally sorted, then output them according to the orginal order.

Sample Input

10 6
AACATGAAGG
TTTTGGCCAA
TTTGGCCAAA
GATCAGATTT
CCCGGGGGGA
ATCGATGCAT

Sample Output

CCCGGGGGGA
AACATGAAGG
GATCAGATTT
ATCGATGCAT
TTTTGGCCAA
TTTGGCCAAA

这个题主要考察求解逆序数的算法，另外有一个细节需要注意（题目描述中红色加粗表示），该细节表示最后的排序需要是稳定排序。

最开始拿到该题求逆序数直接暴力求解，两个循环，并且未考虑稳定排序的隐藏条件，代码如下：

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

struct info
{
	int m_score;
	char *m_pstr;
};
int comp(const void *a, const void *b)
{
	return ((struct info *)a)->m_score - ((struct info *)b)->m_score;
}
int main()
{
	int len, n, i, j, prior;
	
	scanf("%d%d", &len, &n);
	
	struct info *infos = calloc(n, sizeof(struct info));
	for (i=0; i<n; ++i)
	{
		infos[i].m_score = 0;
		infos[i].m_pstr = NULL;
	}
	
	char *str = NULL;
	int *unsortednessPerChar = malloc(len * sizeof(int));
	
	int k = 0;
	while (k < n)
	{
		str = malloc((len + 1) * sizeof(char));
		scanf("%s", str);
		infos[k].m_pstr = str;
		
		memset(unsortednessPerChar, 0, len * sizeof(int));
		for (i=0; i<len-1; ++i)
		{
			for (j=i+1; j<len; ++j)
			{
				if (str[i] > str[j])
					++unsortednessPerChar[i];
			}
			
			infos[k].m_score += unsortednessPerChar[i];
		}
		
		++k;
	}
	
	qsort(infos, n, sizeof(struct info), comp);
	
	for (i=0; i<n; ++i)
	{
		printf("%s\n", infos[i].m_pstr);
	}
}

做完一两天后回过头来考虑优化，关于求逆序数可以采用分治策略，归并的思路完成，并注意到了隐藏稳定排序的条件，具体代码如下：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct info
{
	int m_index;
	int m_score;
	char m_str[60];
}infos[100];
int comp(const void *a, const void *b)
{
	// 快排不是稳定排序，但需要保证排序稳定
	if (((struct info *)a)->m_score == ((struct info *)b)->m_score)
		return ((struct info *)a)->m_index - ((struct info *)b)->m_index;
	else
		return ((struct info *)a)->m_score - ((struct info *)b)->m_score;
}

int gCount = 0;	//逆序数统计
void Merge(char data[], int low, int m, int high)
{
	char *temp = (char *)calloc(high - low + 1, sizeof(char));
	int i = low, j = m + 1, k = 0;
	while (i <= m && j <= high)
	{
		// notice: <= (两数相等不算逆序数)
		if (data[i] <= data[j])
		{
			temp[k++] = data[i++];
		}
		else
		{
			temp[k++] = data[j++];
			// data[j]小于data[i]至data[m]的数
			gCount += m - i + 1;
		}
	}
	
	while (i <= m)
		temp[k++] = data[i++];
	while (j <= high)
		temp[k++] = data[j++];
	
	memcpy(&data[low], temp, (high - low + 1) * sizeof(char));
	free(temp);
}
void MergeCount(char data[], int low, int high)
{	
	if (low == high)
		return;
	
	int m = (low + high) / 2;
	MergeCount(data, low, m);
	MergeCount(data, m + 1, high);
	Merge(data, low, m, high);
}

int main()
{
	int len, n;
	
	scanf("%d%d", &len, &n);
	
	char str[60];
	
	int i = 0;
	while (i < n)
	{
		scanf("%s", str);
		strcpy(infos[i].m_str, str);
		infos[i].m_index = i;
		
		gCount = 0;
		MergeCount(str, 0, len - 1);
		infos[i].m_score = gCount;
		
		++i;
	}
	
	qsort(infos, n, sizeof(struct info), comp);
	
	for (i=0; i<n; ++i)
		printf("%s\n", infos[i].m_str);
	
	return 0;
}

归并算法计算逆序数时，需要注意两数相等不算逆序数，使用<=符号条件：

// notice: <= (两数相等不算逆序数)
		if (data[i] <= data[j])
		{
			temp[k++] = data[i++];
		}
		else
		{
			temp[k++] = data[j++];
			// data[j]小于data[i]至data[m]的数
			gCount += m - i + 1;
		}

暴力解法的时间复杂度为O(n^2)，归并解法的时间复杂度为O(n*logn)，但是本题的数据量很小，并不能看出归并解法的速度优势。

MrKnight

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
【POJ】1007 DNA Sorting

DescriptionOne measure of ``unsortedness'' in a sequence is the number of pairs of entries that are out of order with respect to each other. For instance, in the letter sequence ``DAABEC'', this m
复制链接

扫一扫

专栏目录