# ! /usr/bin/python
# -*- coding: utf8 -*-
# @Time : 2019/10/23
# @Author : zhang
import requests
from urllib.request import urlopen
from bs4 import BeautifulSoup
import os
def parameters(pathsave,last,path): #获取参数
html = urlopen(path + last) # 获取网页
bs = BeautifulSoup(html, 'html.parser') # 解析网页
hyperlink = bs.find_all('a') # 获取所有超链接
pathsavenew = pathsave + last
if not os.path.exists(pathsavenew):
os.makedirs(pathsavenew)
for h1 in hyperlink :
hh = h1.get('href')
if '.mat' in hh:
html = requests.get(path + last + hh)
with open(pathsavenew + hh, "wb") as f:
f.write(html.content)
if '.txt' in hh:
html = requests.get(path + last + hh)
with open(pathsavenew + hh, "wb") as f:
f.write(html.content)
def RawDat
网络爬虫 爬取心血管数据
最新推荐文章于 2024-07-24 03:41:39 发布
本文介绍如何利用Python进行网络爬虫,从指定网站抓取心血管疾病的相关数据集,包括CVRG数据,为医学研究提供数据支持。
摘要由CSDN通过智能技术生成