import re
table = {} # 密码子表
num = 0 # mrna索引
protein = '' #蛋白序列
#RNA密码子表
codon = """UUU F CUU L AUU I GUU V
UUC F CUC L AUC I GUC V
UUA L CUA L AUA I GUA V
UUG L CUG L AUG M GUG V
UCU S CCU P ACU T GCU A
UCC S CCC P ACC T GCC A
UCA S CCA P ACA T GCA A
UCG S CCG P ACG T GCG A
UAU Y CAU H AAU N GAU D
UAC Y CAC H AAC N GAC D
UAA Stop CAA Q AAA K GAA E
UAG Stop CAG Q AAG K GAG E
UGU C CGU R AGU S GGU G
UGC C CGC R AGC S GGC G
UGA Stop CGA R AGA R GGA G
UGG W CGG R AGG R GGG G """
codon = codon.split("\n")
#将密码子表字符串转为字典
for line in codon:
i = 0
pattern = re.compile(r'(.{3} \S+)\s*(.{3} \S+)\s*(.{3} \S+)\s*(.{3} \S+)')
m = pattern.match(line)
while i <= 4:
single = m.group(i).split(" ")
table.update({single[0]: single[1]})
i += 1
#读取文件
with open("../examples/ros_bio8_PROT.txt") as f:
mrna = f.read()
#翻译蛋白
while num < len(mrna):
if table[mrna[num:num+3]] == 'Stop':
break
protein = protein + table[mrna[num:num+3]]
num += 3
print(protein)
09-26