继之前分享了如何爬取虎扑新闻标题和内容,现在实现一下如何发送到QQ邮箱。
其实很简单啦,去自己的QQ邮箱账号设置里面开通一下SMTP、POP3啥的,然后生成一串只能你自己知道的授权码,使用这个授权码和你的邮箱账号才能第三方发送邮件,你的QQ密码不行的噢!
下面就直接贴代码了,其实和之前的代码差不多,发送邮件调取python强大的库即可,溜了并且留下代码:
#!/usr/bin/python2
# -*- coding: UTF-8 -*-
import urllib2
from bs4 import BeautifulSoup
import MySQLdb
import smtplib
from email.mime.text import MIMEText
from email.header import Header
def sendMail(receivers,content):
# 第三方 SMTP 服务
mail_host = "smtp.qq.com" # 设置服务器(使用第三方 SMTP 服务发送或本机安装 sendmail(邮件传输代理程序)localhost)
mail_user = "2365285094@qq.com" # 服务器用户名,也就是你的邮箱账号
mail_pass = "xxxxxxx" # 口令 授权码
sender = '2365285094@qq.com' #发送者邮箱
message = MIMEText(content, 'plain', 'utf-8')
message['From'] = Header("xxxx2号", 'utf-8') #发送人
message['To'] = Header("xxxx1号", 'utf-8') #接收人
subject = '每日虎扑新闻爬取' #邮件标题
message['Subject'] = Header(subject, 'utf-8')
try:
smtpObj = smtplib.SMTP_SSL(mail_host, 465)# 465 为 SMTP 端口号 链接服务器
smtpObj.login(mail_user, mail_pass)
smtpObj.sendmail(sender, receivers, message.as_string())
print "email success"
except smtplib.SMTPException:
print "Error: cannot send email"
#返回向url请求所获得的html
def getHtml(url):
rs = urllib2.urlopen(url)
return rs.read()
# 打开数据库连接
# db = MySQLdb.connect("xxxxxx", "root", "xxxx", "news_test", charset='utf8' )
# 使用cursor()方法获取操作游标
# cursor = db.cursor()
#虎扑NBA新闻的首页地址是https://voice.hupu.com/nba/1"
#下一页就是2,再下一页就是3
def getNewsListByPage(page):
html = getHtml("https://voice.hupu.com/nba/"+str(page))
soup = BeautifulSoup(html,"html.parser")
news_list = soup.select(".list-hd")
#print len(news_list)
return news_list
pages_list = []
#这里我只遍历前三页,我记得一共是12页?因为当天最新的新闻也不会在那么后面
for i in range(1,3):
for news in getNewsListByPage(i):
pages_list.append(news.h4.a['href'])
#判断关键词是否在sql里面出现
def isContains(interests,sql):
for i in range(len(interests)):
if interests[i] in sql:
return True
return False
#对每一个单一的新闻链接请求 获取标题和新闻内容
#我是勇士球迷
interests = ["库里","勇士","杜兰特","克莱"]
email_content=''
flag = False
for i in range(len(pages_list)):
#print "执行=======第"+str(i)+"条新闻"
curr_html = getHtml(pages_list[i])
curr_soup = BeautifulSoup(curr_html,"html.parser")
#print len(curr_soup.select(".headline"))
title = curr_soup.select(".headline")[0].get_text(strip=True).encode('utf-8')
#print len(curr_soup.select(".artical-main-content"))
content = curr_soup.select(".artical-main-content")[0].get_text(strip=True).encode('utf-8')
# select_sql = "select * from hupu_nba_news where title like '%"+title+"%'"
# #查询数据库是否有该条记录,否则插入
# try:
# cursor.execute(select_sql)
# results = cursor.fetchall()
# if len(results) > 0:
# continue
# except:
# print "查询错误"
# insert_sql = 'insert into hupu_nba_news(title,content) values("'+title+'","'+content+'");'
# #print sql
# if isContains(interests,insert_sql):
# try:
# # 执行sql语句
# cursor.execute(insert_sql)
# # 提交到数据库执行
# db.commit()
# except:
# # Rollback in case there is any error
# db.rollback()
if isContains(interests,title):
flag = True
email_content += '标题:'+title+'\n\n'+'内容:'+content+'\n\n\n'
#发送邮件
if flag:
sendMail(['970014590@qq.com'],email_content)
print "done"
# db.close()
毕,见笑了,有什么不懂的可以在下面留言。
勇士总冠军!