将下载的 Fasta 格式注释行没有物种名的通过python脚本将对应的 GenPept 格式的物种添加至注释行,再通过脚本将同一物种较短的重复序列去除。
from multivalued_dict_package import * #pip install multivalued_dict
with open('sequence.fasta') as fasta_file:
fasta_line_list = fasta_file.readlines()
fasta_line_list = [line.rstrip('\n\r') for line in fasta_line_list] #去除每行末的换行符回车符
with open('sequence.gp') as gp_file:
gp_line_list = gp_file.readlines()
for i in range(len(fasta_line_list)): #遍历fasta文件每一行
if fasta_line_list[i].startswith('>') and (not fasta_line_list