Swiss-model的API使用，批量同源建模

最新推荐文章于 2024-12-10 20:58:01 发布

爱穿豹纹的阿姨

最新推荐文章于 2024-12-10 20:58:01 发布

阅读量825

点赞数 5

文章标签： python linux

本文链接：https://blog.csdn.net/leeleeleeastro/article/details/141935503

版权

在alphafold模型流行的今天，传统的同源结构建模预测仍然值得重视。

本文通过swissmodel提供的文档，搭建基于python通过api提交任务形式的批量同源建模。

参考：

帮助 |瑞士模型 (expasy.org)https://swissmodel.expasy.org/docs/help#modelling_api

利用Swiss-model API进行蛋白序列提交及蛋白结构建模_swissmodel-CSDN博客https://blog.csdn.net/weixin_44065416/article/details/129056843多一句没有，少一句不行，直接上代码：

import requests
import time
import os
from Bio import SeqIO


def swiss_model(token, target_seq, seq_id, outpath):
    # automodel启动建模项目
    response = requests.post(
        "https://swissmodel.expasy.org/automodel",
        headers={"Authorization": f"Token {token}"},
        json={
            "target_sequences": target_seq,
            "project_title": seq_id
        },
        timeout=10)

    # 查看运行状态并返回结果下载链接
    project_id = response.json()["project_id"]  ## 获取 project id
    url_list = []
    score_list = []
    while True:
        ## 每隔10s不断地查看运行状态，完成则下载数据，失败则在main方法处被捕获
        time.sleep(10)
        response = requests.get(
            f"https://swissmodel.expasy.org/project/{project_id}/models/summary/",
            headers={"Authorization": f"Token {token}"})
        response_object = response.json()
        status = response_object["status"]
        print(f'Job status is now{status}!  id:{seq_id}')

        if status == "COMPLETED":
            for model in response_object['models']:
                #model 的内容是 {'model_id': '01', 'status': 'COMPLETED', 'gmqe': 0.05, 'qmean_global': {'avg_local_score': 0.71}, 'coordinates_url': 'https://swissmodel.expasy.org/project/WbWWJA/models/01.pdb.gz'}
                url_list.append(model['coordinates_url'])
                score_list.append(model["qmean_global"]["avg_local_score"])  ## 记录平均局部最高分
            break
        elif status == "FAILED":
            break

    # 下载最高分（平均局部最高分）对应的结构
    max_score = max(score_list)
    max_struct = url_list[score_list.index(max_score)]  ## 下载局部最高分对应的结构
    with open(outpath + "model_score.log", "a") as outF:  ## 将每个蛋白的局部最高分输出
        outF.write("SeqID: %s, Score: %s\n" % (seq_id, max_score))
    cmd_download = "wget " + max_struct + " -O " + outpath + seq_id + ".pdb.gz"  ## 根据url利用wget进行下载并重命名为 序列ID.pdb.gz
    os.system(cmd_download)
    cmd_rmwget = "rm wget-log"  ## 删掉wget下载时重定向生成的 wget-log文件
    os.system(cmd_rmwget)


def swiss_model_single_seq(token, inf_fa,outpath):  ## 单个序列的提交，token（swiss-model提供的令牌）inf_fa（输入文件，fasta格式）；outpath（输入文件路径）
    for record in SeqIO.parse(inf_fa, "fasta"):
        target_seq = str(record.seq)
        seq_id = record.id
        try:
            swiss_model(token=token, target_seq=target_seq, seq_id=seq_id, outpath=outpath)
        except:
            with open(outpath + "Failed.log", "a") as outF:
                outF.write("SeqID: %s, Score: %s\n" % (seq_id, "Failed"))

if __name__ == '__main__':
    token = 'your token'
    inf_fa = 'your fasta file'  #输入文件
    outpath = 'out/'
    if not os.path.exists(outpath):
        os.makedirs(outpath)
    swiss_model_single_seq(token, inf_fa,outpath)

1.需要的包：