#翻译过程的if,else写的极其拙劣,以后有时间有了更好的方法再来重写
#B站未琢三个碱基为一体遍历是一种比较好的思路,可以借鉴
import re
with open("../examples/ros_bio18_ORF.txt") as f:
file = f.readlines()
table = {} # 密码子表
#RNA密码子表
codon = """UUU F CUU L AUU I GUU V
UUC F CUC L AUC I GUC V
UUA L CUA L AUA I GUA V
UUG L CUG L AUG M GUG V
UCU S CCU P ACU T GCU A
UCC S CCC P ACC T GCC A
UCA S CCA P ACA T GCA A
UCG S CCG P ACG T GCG A
UAU Y CAU H AAU N GAU D
UAC Y CAC H AAC N GAC D
UAA Stop CAA Q AAA K GAA E
UAG Stop CAG Q AAG K GAG E
UGU C CGU R AGU S GGU G
UGC C CGC R AGC S GGC G
UGA Stop CGA R AGA R GGA G
UGG W CGG R AGG R GGG G """
codon = codon.split("\n")
#将密码子表字符串转为字典
for line in codon:
i = 0
pattern = re.compile(r'(.{3} \S+)\s*(.{3} \S+)\s*(.{3} \S+)\s*(.{3} \S+)')
m = pattern.match(line)
while i <= 4:
single = m.group(i).split(" ")
table.update({single[0]: single[1]})
i += 1
#将序列读入sequence
base = {'A': 'U', 'G': 'C', 'C': 'G', 'T': 'A'}
dna = ''
for line in file:
line = re.sub("\n", "", line)
m = re.match(r'^>.*', line)
if m:
continue
else:
dna += line
#获取dna的补码mrna和反向补码
mrna = dna.replace('T', 'U')
r_mrna = ''.join([base[i] for i in dna[::-1]])
rna = [mrna, r_mrna]
#遍历两个rna,对每个rna进行三次循环,给起始密码子和终止密码子各设置一个开关,以便获取是否开始翻译的信息
all_pro = []
for index in rna:
n = 0
while n < 3:
i = n
protein = ''
start = end = 0
location = []
while i < len(index):
amino = index[i:i+3]
if len(amino) != 3:
break
if table[amino] == 'M':
if protein == '':
location = i + 3 # 获取起始密码子的索引,在翻译结束后从location继续遍历寻找新的起始密码子
protein += table[amino]
start = 1
elif table[amino] == 'Stop' and start == 1:
end = 1
elif protein != '':
protein += table[amino]
if start == 1 and end == 1:
all_pro.append(protein)
protein = ''
start = end = 0
i = location
continue
i += 3
n += 1
all_pro = list(set(all_pro)) # 删除列表重复元素
for e in all_pro:
print(e)
Rosalind第18题——ros_bio18_ORF
最新推荐文章于 2021-08-17 22:40:00 发布