POJ1007 DNA Sorting

最新推荐文章于 2021-02-24 13:48:36 发布

huanghanqian

最新推荐文章于 2021-02-24 13:48:36 发布

阅读量475

点赞数

分类专栏： POJ

本文链接：https://blog.csdn.net/huanghanqian/article/details/51534935

版权

POJ 专栏收录该内容

17 篇文章 1 订阅

订阅专栏

题目：

Description

One measure of ``unsortedness'' in a sequence is the number of pairs of entries that are out of order with respect to each other. For instance, in the letter sequence ``DAABEC'', this measure is 5, since D is greater than four letters to its right and E is greater than one letter to its right. This measure is called the number of inversions in the sequence. The sequence ``AACEDGG'' has only one inversion (E and D)---it is nearly sorted---while the sequence ``ZWQM'' has 6 inversions (it is as unsorted as can be---exactly the reverse of sorted).
You are responsible for cataloguing a sequence of DNA strings (sequences containing only the four letters A, C, G, and T). However, you want to catalog them, not in alphabetical order, but rather in order of ``sortedness'', from ``most sorted'' to ``least sorted''. All the strings are of the same length.

Input

The first line contains two integers: a positive integer n (0 < n <= 50) giving the length of the strings; and a positive integer m (0 < m <= 100) giving the number of strings. These are followed by m lines, each containing a string of length n.

Output

Output the list of input strings, arranged from ``most sorted'' to ``least sorted''. Since two strings can be equally sorted, then output them according to the orginal order.

Sample Input

10 6
AACATGAAGG
TTTTGGCCAA
TTTGGCCAAA
GATCAGATTT
CCCGGGGGGA
ATCGATGCAT

Sample Output

CCCGGGGGGA
AACATGAAGG
GATCAGATTT
ATCGATGCAT
TTTTGGCCAA
TTTGGCCAAA

思路：

题目的意思就是求每个DNA序列的混乱度，再将序列以混乱度从小到大的顺序打印出来。关于混乱度的定义，description第一段已给出。我抖了个机灵，用了冒泡，当冒泡发生交换时，混乱度就加一。这样求得的混乱度貌似也与它这里定义的混乱度一样哎。而且我的方法不局限于字符串是DNA的那几个字母。

写了一次就AC了。然后发现同样的代码提交时一会儿时间是16ms，一会儿又是0ms，难道该题的测试用例还在不停的变化吗？

代码：

memory:248K, time16MS.

#include<iostream>

using namespace std;

int main(){
	int n,m;
	cin>>n>>m;
	char DNAs[100][51];
	char sortedDNAs[100][51];
	int sortedIndex[100][2];//两列，第一列代表DNA数组中的index值，第二列代表reverse次数
	int sortedIndexCount=-1;//sortedIndex数组最后一个元素的index
	for(int i=0;i<m;i++){
		cin>>DNAs[i];
		memcpy(sortedDNAs[i],DNAs[i],n*sizeof(char));
	}
	//冒泡排序 
	for(int num=0;num<m;num++){
		int count=0;
	    for(int i=0;i<n-1;i++){
	    	bool isReverse=false;
		    for(int j=0;j<n-i-1;j++){
			    if(sortedDNAs[num][j]>sortedDNAs[num][j+1]){
					char buffer=sortedDNAs[num][j];
					sortedDNAs[num][j]=sortedDNAs[num][j+1];
					sortedDNAs[num][j+1]=buffer;
					count++;
					isReverse=true;
				}
		    }
			if(isReverse==false)
				break;
	    }
		//插入排序，将混乱度由小到大排序
		int point=sortedIndexCount;
		while(point>=0&&sortedIndex[point][1]>count){
			sortedIndex[point+1][0]=sortedIndex[point][0];
			sortedIndex[point+1][1]=sortedIndex[point][1];
			point--;
		}
		point++;
		sortedIndex[point][1]=count;
		sortedIndex[point][0]=num;
		sortedIndexCount++;
	}
	for(int i=0;i<m;i++){
		cout<<DNAs[sortedIndex[i][0]]<<endl;
	}

	return 0;
}

这里用到了C++数组深拷贝的知识：

int a[10];
int b[10];
//想把b数组里的值拷贝到a数组里，使用
memcpy(a,b,10*sizeof(int))

另一个例子：

//使用memcpy复制
unsigned char R[3][2];
unsigned char Test[2] = {2,3};
memcpy(&R[i][0], &Test[0], sizeof(Test));

当然，我也要去看看别人是怎么写的啦~

原来大家不用冒泡，根据题目的定义来计算混乱度，传统写法是这样子的呀。时间也跟我差不多。

//Memory Time 
//252K   16MS 

#include<iostream>
#include<algorithm>
using namespace std;

typedef class dna
{
	public:
		int num;  //逆序数
		char sq[110];  //DNA序列
}DNAStr;

int InversionNumber(char* s,int len)
{
	int ans=0;  //s逆序数
	int A,C,G;  //各个字母出现次数，T是最大的，无需计算T出现次数
	A=C=G=0;
	for(int i=len-1;i>=0;i--)
	{
		switch(s[i])
		{
		    case 'A':A++;break;  //A是最小的，无逆序数
			case 'C':
				 {
					 C++;
					 ans+=A;  //当前C后面出现A的次数就是这个C的逆序数
					 break;
				 }
			case 'G':
				{
					G++;
					ans+=A;
					ans+=C;
					break;
				}
			case 'T':
				{
					ans+=A;
					ans+=C;
					ans+=G;
					break;
				}
		}
	}
	return ans;
}

int cmp(const void* a,const void* b)
{
	DNAStr* x=(DNAStr*)a;
	DNAStr* y=(DNAStr*)b;
	return (x->num)-(y->num);
}

int main(void)
{
	int n,m;
	while(cin>>n>>m)
	{
		DNAStr* DNA=new DNAStr[m];
		for(int i=0;i<m;i++)
		{
			cin>>DNA[i].sq;
			DNA[i].num = InversionNumber(DNA[i].sq,n);
		}
		qsort(DNA,m,sizeof(DNAStr),cmp);
		for(int j=0;j<m;j++)
			cout<<DNA[j].sq<<endl;
	}
	return 0;
}

另外，有一个解法，算法倒是一点不新奇，但是它使用了qsort。时间也跟我差不多。

#include <iostream>
#include <cstdlib>
#include <string>
using namespace std;
struct DNA{
  string s;     
  int value;
};
int cmp(const DNA *a, const DNA *b)  {   return (a->value-b->value);   }    
int main()
{
    int n,m;
    while(cin>> m >>n)
    {
         DNA *it=new DNA[n];      
         for(int i=0;i!=n;i++)
         {
            cin >> it[i].s;
            it[i].value = 0;
            for(int j=0;j!=m;j++)
                for(int k=j+1;k!=m;k++)
                     if(it[i].s[j]>it[i].s[k])   it[i].value++;
         }
         qsort(it,n,sizeof(DNA), (int (*)(const void *, const void *))cmp);
         for(int i=0;i!=n;i++)
              cout << it[i].s <<endl ;  
         delete[] it;             
    }
    return 0;    
}

它是用结构体包含其字符串和对应的inversions个数，从而用快排把inversions和其对应的字符串一次排出。

这里为了加强对快排函数qsort格式的认识和理解，从百度上转来了qsort对7种类型数据的快排格式和一个实例：

七种qsort排序方法

<本文中排序都是采用的从小到大排序>

一、对int类型数组排序
int num[100];

int cmp ( const void *a , const void *b ) { 
   return *(int *)a - *(int *)b; 
} 
qsort(num,100,sizeof(num[0]),cmp);

二、对char类型数组排序（同int类型）
char word[100];

int cmp( const void *a , const void *b ) { 
   return *(char *)a - *(int *)b; 
} 
qsort(word,100,sizeof(word[0]),cmp);

三、对double类型数组排序（特别要注意）
double in[100];

int cmp( const void *a , const void *b ) { 
   return *(double *)a > *(double *)b ? 1 : -1; 
} 
qsort(in,100,sizeof(in[0]),cmp)；

四、对结构体一级排序

struct In { 
   double data; 
   int other; 
}s[100] 
//按照data的值从小到大将结构体排序,关于结构体内的排序关键数据data的类型可以很多种，参考上面的例子写 
int cmp( const void *a ,const void *b) { 
   return (*(In *)a).data > (*(In *)b).data ? 1 : -1; 
} 
qsort(s,100,sizeof(s[0]),cmp);

五、对结构体二级排序

struct In { 
   int x; 
   int y; 
}s[100]; 
//按照x从小到大排序，当x相等时按照y从大到小排序 
int cmp( const void *a , const void *b ) { 
   struct In *c = (In *)a; 
   struct In *d = (In *)b; 
   if(c->x != d->x) return c->x - d->x; 
   else return d->y - c->y; 
} 
qsort(s,100,sizeof(s[0]),cmp);

六、对字符串进行排序

struct In { 
   int data; 
   char str[100]; 
}s[100]; 
//按照结构体中字符串str的字典顺序排序 
int cmp ( const void *a , const void *b ) { 
   return strcmp( (*(In *)a)->str , (*(In *)b)->str ); 
} 
qsort(s,100,sizeof(s[0]),cmp);

七、计算几何中求凸包的cmp

int cmp(const void *a,const void *b){ //重点cmp函数，把除了1点外的所有点，旋转角度排序 
   struct point *c=(point *)a; 
   struct point *d=(point *)b; 
   if( calc(*c,*d,p[1]) < 0) return 1; 
   else if( !calc(*c,*d,p[1]) && dis(c->x,c->y,p[1].x,p[1].y) < dis(d->x,d->y,p[1].x,p[1].y)) //如果在一条直线上，则把远的放在前面 
   return 1; 
   else return -1; 
}

PS:
其中的qsort函数包含在<stdlib.h>的头文件里，strcmp包含在<string.h>的头文件里

对浮点型快排实例:源程序, vc6通过的:

#include <stdio.h>
#include <stdlib.h>
int fcmp(const float*,const float*);
typedef int(*QSORT_UDF)(const void *,const void *);
int main(){
   float fArray[10] = {32.1,456.87,332.67,442.0,98.12,451.79,340.12,54.55,99.87,72.5};
   qsort(fArray,10,sizeof(float),(QSORT_UDF)fcmp);
   for(int i=0;i<10; i++)
      printf(" %3.2f ",fArray[i]);
   return 0;
}
int fcmp(const float *a,const float *b){
   if( *a > *b )
      return 1;
   else if(*a < *b )
      return -1;
   return 0;
}