- 提取序列ID
10 def extract_ids(fasta_file):
11 ids = []
12 with open(fasta_file, "r") as handle:
13 # 读取包含多个序列的 fasta 格式文件
14 for record in SeqIO.parse(handle, "fasta"):
15 # Fasta 文件中序列名所在行的第一个词被作为 id 和 name
16 ids.append(record.id)
17 return ids
- 对齐ID
19 def alphaname_to_fastaname(ids):
20 #命名对齐
21 alpha_name = []
22 #get the number of alphafold_*.pdb
23 model_num = os.popen('ls alphafold_*.pdb | wc -l').read().strip('\n')
24 for i in range(int(model_num)):
25 alpha_name.append('alphafold_'+ str(i))
26
27 #alphafold_name to oscar_name
28 name_map = dict(zip(alpha_name, ids))
29 return name_map