#科研过程中,有时候需要批量爬取Webserver预测得到的静态(动态)数据。
以磷酸化位点信息为例,用python实现其过程。
import re
import urllib
import numpy as np
from bs4 import BeautifulSoup
def crawl_web_phosp(seq_all):
url = "http://www.dabi.temple.edu/disphos/pred/predict"
headers = {
"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"Host": "www.dabi.temple.edu",
"Origin": "http://www.dabi.temple.edu",
"Referer": "http://www.dabi.temple.edu/disphos/",
"Content-Type": "multipart/form-data; boundary=----WebKitFormBoundarykgdpT9IlfSthlGa3",
"Accept-Encoding": "gzip, deflate",