# !、usr/bin/env.python
# ._*_ coding:utf-8 _*_
import re
import requests
import urllib.request
import os
r = requests.get('http://www.ijcai.org/proceedings/2017/')
data = r.text
link_list = re.findall(r"(?<=href=\").+?pdf(?=\">PDF)|(?<=href=\').+?pdf(?=\">PDF)", data)
name_list = re.findall(r"(?<=\"title\">).+?(?=</div>)", data)
cnt = 0
num = len(link_list)
localDir = 'C:\IJCAI2017\\'
if not os.path.exists(localDir):
os.makedirs(localDir)
while cnt < num:
url = link_list[cnt]
if cnt > 0:
url = url[-8:]
file_name = name_list[cnt]
file_name = file_name.replace('\"', '_')
file_name = file_name.replace('?', '_')
file_name = file_name.replace('/', '_')
file_name = file_name.replace(' (Extended Abstract)', '')
file_path = localDir + file_name + '.pdf'
# download pdf files
print('[' + str(cnt+1) + '/' + str(num) + "] Downloading -> " + file_path)
urllib.request.urlretrieve('http://www.ijcai.org/proceedings/2017/'+url, file_path)
cnt = cnt + 1
print("all download finished")
Python的版本是3.5,网速下得可能会比较慢
IJCAI2017论文下载python脚本
最新推荐文章于 2022-09-25 15:51:38 发布