实验题目2:寻找最大公共子序列 (10.25日实验) 要求:
(1)分别设计暴力法和动态规划算法,
(2)写出代码,并调试成功
(3)随机产生由A,C,G,T四种字符组成的两条序列,运行(1)两种算法,输出结果(长度和子序列),比较运行时长
import random
import time
def LCS(str1,str2,len1,len2,path):
#产生len1*len2的矩阵,横为str2,纵为str1
L=[[0 for i in range(len2+1)] for j in range(len1+1)]
for i in range(1,len1+1):#从第一行第一列往后
for j in range(1,len2+1):
if str1[i-1]==str2[j-1]:#字符串是从0开始的
L[i][j]=L[i-1][j-1]+1
path[i][j]=0#由斜对角得到path为0
else:
L[i][j] = max(L[i-1][j],L[i][j-1])
if L[i-1][j]>=L[i][j-1]:
path[i][j]=1#由上一个得到为1
else:
path[i][j]=-1#由左边得到为-1
return L[len1][len2]
'''记录路径,向上则path=1,向左则path=-1,从斜对角下来path=0'''
def stringpath(str1,path,i,j,sub):
if i==0 or j==0:
return
else:
if path[i][j]==0:
stringpath(str1,path,i-1,j-1,sub)
sub.append(str1[i-1])
elif path[i][j]==-1:
stringpath(str1,path, i , j - 1,sub)
elif path[i][j]==1:
stringpath(str1,path,i-1,j,sub)
'''暴力算法
生成str2所有子序列'''
def allsubstring(allsub,str2,len2,i,temp):
if i==len2:#到达字符串末尾
allsub.append(temp)
else:
allsubstring(allsub, str2, len2, i+1, temp+str2[i])#加入str2[i]
allsubstring(allsub, str2, len2, i + 1, temp)#不加入str2[i]
'''把str2的所有子序列和str1比较是不是str1的子序列'''
def comparesub(str1,str3):
i,j=0,0
while i< len(str1) and j<len(str3):
if str1[i]==str3[j]:
i+=1
j+=1
else:
i+=1
if i==len(str1)and j!=len(str3): #同时str3不到末尾
return 0
if j==len(str3):
return 1
if __name__=='__main__':
s=['A','G','C','T']
sub=[]#储存子序列
allsub=[]#储存str2所有子序列
len1=random.randint(10,20)#随机长度
len2=random.randint(10,20)
str1=[random.choice(s) for i in range(len1)]#随机产生长度为len1的字符串
str2=[random.choice(s) for i in range(len2)]
time1=time.time()
path = [[0 for i in range(len2 + 1)] for j in range(len1 + 1)]#记录路径
sublength=LCS(str1,str2,len1,len2,path)
stringpath(str1,path,len1,len2,sub)
time2=time.time()
print("str1为:",str1)
print("str2为:",str2)
print("LCS算法")
print("字串长度为:",sublength)
print("公共子串为:",sub)
print("LCS所用时间为:",time2-time1)
print("暴力算法")
time1 = time.time()
max,index=-1,-1
allsubstring(allsub,str2,len2,0,"")
for i in range(len(allsub)): #将所有子序列与str1逐一比较
flag=comparesub(str1,allsub[i])
if flag==1:
templen=len(allsub[i])
if max<=templen:#找最大的长度子串
max=templen
index = i
time2 = time.time()
print("字串长度为:", max)
print("公共子串为:", allsub[index])
print("暴力所用时间为:",time2-time1)
运行结果:
str1为: [‘C’, ‘G’, ‘T’, ‘T’, ‘T’, ‘G’, ‘T’, ‘A’, ‘G’, ‘T’, ‘A’, ‘G’, ‘G’, ‘T’]
str2为: [‘G’, ‘G’, ‘C’, ‘G’, ‘C’, ‘C’, ‘T’, ‘T’, ‘T’, ‘G’, ‘T’, ‘G’, ‘C’, ‘G’, ‘A’, ‘C’, ‘T’, ‘G’, ‘G’]
LCS算法
字串长度为: 11
公共子串为: [‘C’, ‘G’, ‘T’, ‘T’, ‘T’, ‘G’, ‘T’, ‘A’, ‘T’, ‘G’, ‘G’]
LCS所用时间为: 0.0
暴力算法
字串长度为: 11
公共子串为: CGTTTGTATGG
暴力所用时间为: 10.024006843566895