本文将使用Python爬取百度新闻搜索指数排名前50的新闻,并通过服务器运行,每天定时发送到指定邮箱。
先上代码:
# -*- coding:utf-8 -*-
import requests,os,re,email,smtplib
EMAIL_ADDRESS='****@hotmail.com'
EMAIL_PASSWORD='***'
s=smtplib.SMTP(host='smtp.office365.com',port=587)
s.starttls()
s.login(EMAIL_ADDRESS,EMAIL_PASSWORD)
print('Connected~')
url='http://top.baidu.com/buzz?b=341&c=513&fr=topbuzz_b1'
r=requests.get(url)
txt=r.content.decode('GBK')
pt=re.compile('href_top=".*?">(.*?)<')
title=re.findall(pt,txt)
pt1=re.compile('href="(.*?)" href_top')
urls=re.findall(pt1,txt)
#print(len(urls))
pt2=re.compile('icon-....">(.*?)</span')
rise=re.findall(pt2,txt)
#print(title,rise,urls)
i=50
mss=''
for x in range(i):
ma=(str(x+1),title[x],rise[x],urls[x])