python2爬取虎扑NBA的新闻标题和内容发送到QQ邮箱

本文链接：https://blog.csdn.net/lcr_happy/article/details/83719669

继之前分享了如何爬取虎扑新闻标题和内容，现在实现一下如何发送到QQ邮箱。

其实很简单啦，去自己的QQ邮箱账号设置里面开通一下SMTP、POP3啥的，然后生成一串只能你自己知道的授权码，使用这个授权码和你的邮箱账号才能第三方发送邮件，你的QQ密码不行的噢！

下面就直接贴代码了，其实和之前的代码差不多，发送邮件调取python强大的库即可，溜了并且留下代码：

#!/usr/bin/python2
# -*- coding: UTF-8 -*-

import urllib2
from bs4 import BeautifulSoup
import MySQLdb
import smtplib
from email.mime.text import MIMEText
from email.header import Header

def sendMail(receivers,content):
    # 第三方 SMTP 服务
    mail_host = "smtp.qq.com"  # 设置服务器(使用第三方 SMTP 服务发送或本机安装 sendmail（邮件传输代理程序）localhost)
    mail_user = "2365285094@qq.com"  # 服务器用户名，也就是你的邮箱账号
    mail_pass = "xxxxxxx"  # 口令 授权码

    sender = '2365285094@qq.com' #发送者邮箱

    message = MIMEText(content, 'plain', 'utf-8')
    message['From'] = Header("xxxx2号", 'utf-8') #发送人
    message['To'] = Header("xxxx1号", 'utf-8')  #接收人

    subject = '每日虎扑新闻爬取'  #邮件标题
    message['Subject'] = Header(subject, 'utf-8')

    try:
        smtpObj = smtplib.SMTP_SSL(mail_host, 465)# 465 为 SMTP 端口号 链接服务器
        smtpObj.login(mail_user, mail_pass)
        smtpObj.sendmail(sender, receivers, message.as_string())
        print "email success"
    except smtplib.SMTPException:
        print "Error: cannot send email"

#返回向url请求所获得的html
def getHtml(url):
    rs = urllib2.urlopen(url)
    return rs.read()

# 打开数据库连接
# db = MySQLdb.connect("xxxxxx", "root", "xxxx", "news_test", charset='utf8' )
# 使用cursor()方法获取操作游标
# cursor = db.cursor()
#虎扑NBA新闻的首页地址是https://voice.hupu.com/nba/1"
#下一页就是2，再下一页就是3
def getNewsListByPage(page):
    html = getHtml("https://voice.hupu.com/nba/"+str(page))
    soup = BeautifulSoup(html,"html.parser")
    news_list = soup.select(".list-hd")
    #print len(news_list)
    return news_list
pages_list = []

#这里我只遍历前三页，我记得一共是12页？因为当天最新的新闻也不会在那么后面
for i in range(1,3):
    for news in getNewsListByPage(i):
        pages_list.append(news.h4.a['href'])

#判断关键词是否在sql里面出现
def isContains(interests,sql):
    for i in range(len(interests)):
        if interests[i] in sql:
            return True
    return False

#对每一个单一的新闻链接请求 获取标题和新闻内容
#我是勇士球迷
interests = ["库里","勇士","杜兰特","克莱"]
email_content=''
flag = False
for i in range(len(pages_list)):
    #print "执行=======第"+str(i)+"条新闻"
    curr_html = getHtml(pages_list[i])
    curr_soup = BeautifulSoup(curr_html,"html.parser")
    #print len(curr_soup.select(".headline"))
    title = curr_soup.select(".headline")[0].get_text(strip=True).encode('utf-8')
    #print len(curr_soup.select(".artical-main-content"))
    content = curr_soup.select(".artical-main-content")[0].get_text(strip=True).encode('utf-8')
    # select_sql = "select * from hupu_nba_news where title like '%"+title+"%'"
    # #查询数据库是否有该条记录，否则插入
    # try:
    #     cursor.execute(select_sql)
    #     results = cursor.fetchall()
    #     if len(results) > 0:
    #         continue
    # except:
    #     print "查询错误"
    # insert_sql = 'insert into hupu_nba_news(title,content) values("'+title+'","'+content+'");'
    # #print sql
    # if isContains(interests,insert_sql):
    #     try:
    #     # 执行sql语句
    #         cursor.execute(insert_sql)
    #     # 提交到数据库执行
    #         db.commit()
    #     except:
    #     # Rollback in case there is any error
    #         db.rollback()
    if isContains(interests,title):
        flag = True
        email_content += '标题：'+title+'\n\n'+'内容：'+content+'\n\n\n'
#发送邮件
if flag:
    sendMail(['970014590@qq.com'],email_content)
print "done"
# db.close()