python获取DeepMind发布的文章

以下提供了两种下载方法,一种是用自带的库下载,还有一种是调用迅雷下载

#coding:utf-8

from lxml import etree
import time
import requests
import progressbar  
import win32com
from win32com.client import Dispatch


header={
	'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'+
	'Chrome/67.0.3396.87 Safari/537.36'
}

#首先从原网站查询共有多少页,也就是下面的19页

urls=['https://deepmind.com/research/publications/?page={}'.format(str(i))
		for i in range(1,20)]
path='I:\\DeepMind'

o = Dispatch("ThunderAgent.Agent64.1")
def download(title_name,download_src) :
	data=requests.get(download_src,headers=header)
	total_length = int(data.headers.get("Content-Length"))
	with open(path+title_name+'.pdf', 'wb') as f:
		widgets = ['Progress: ', progressbar.Percentage(), ' ',progressbar.Bar(marker='#', left='[', right=']'),' ', progressbar.ETA(), ' ', progressbar.FileTransferSpeed()]
		pbar = progressbar.ProgressBar(widgets=widgets, maxval=total_length).start()
		count=0
		for chunk in data.iter_content(chunk_size=1):
			if chunk:
				count=count+1
				f.write(chunk)
				f.flush()
			pbar.update(count)
		pbar.finish()
	return
	
for url in urls :
	page=page+1
	res=requests.get(url)
	selector=etree.HTML(res.text)
	second_urls=selector.xpath('//div[@class="listing--list-items"]/article')
	
	
	for second_url in second_urls:
		count=count+1
		title_name=second_url.xpath('div/div[2]/header/div/h1')[0].text
		r=second_url.xpath('div/div[2]/footer/a[2]/@href')
		download_src=r[0] if r else ''
		if download_src.strip()=='':
			count1=count1+1
			v=second_url.xpath('div/div[2]/footer/a[1]/@href')
			download_src=v[0] if v else ''
			if download_src.strip()!='':
				o.AddTask(download_src, title_name+'.pdf', path, "", "", -1, 0, 5)
				o.CommitTasks()
				time.sleep(3)
				#download(title_name,download_src)
		else :
			#download(title_name,download_src)
			o.AddTask(download_src, title_name+'.pdf', path, "", "", -1, 0, 5)
			o.CommitTasks()
			time.sleep(3)
				
		print(title_name)
		print(download_src)
		
	print(page)
		
print(count)


    
   

有些发布的不是论文,在迅雷里会提示任务超时,不必担心

 

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值