9.1检测二硫键模式
import re
fo=open('genome.fasta','r')
#把多序列文件整成字典格式,并使序列连贯起来
txt=fo.readlines()
flag=True
seq=''
dict={}
title='A'
for line in txt:
if line[0]=='>' and flag==True:
title=line
elif line[0]=='>' and flag==False:
dict[title]=seq
seq=''
title = line
pass
else:
seq+=line.strip()
flag = False
dict[title] = seq
#开始检索,并把出现的模板改成小写形式方便查看
pattern=re.compile('C.{1,4}C.')
for key,value in dict.items():
motif=pattern.findall(value)
if motif:
for i in motif:
value=value.replace(i,i.lower())
print(key,value)
9.2解析白鲸
fo=open('Moby Dick.txt','r')
txt=fo.read()
txt=txt.lower()
for i in ',.?:"()''!-;':
txt=txt.replace(i,'')
ls=txt.split()
dict={}
for i in ls:
dict[i]=dict.get(i,0)+1
ls2=sorted(dict.items(),key=lambda item:item[1],reverse=True)
for i in ls2:
if i[1]>dict['captain'] or i[1]>dict['whale']:
print(i)
else:
break
print('captain出现{}次,whale出现{}次'.format(dict['captain'],dict['whale']))
fo.close()
9.3搜索人类激酶中的磷酸化位点
import re
fo=open('abc.fasta','r')
#把多序列文件整成字典格式,并使序列连贯起来
txt=fo.readlines()
flag=True
seq=''
dict={}
title='A'
for line in