python2爬取虎扑NBA的新闻标题和内容发送到QQ邮箱

继之前分享了如何爬取虎扑新闻标题和内容,现在实现一下如何发送到QQ邮箱。

其实很简单啦,去自己的QQ邮箱账号设置里面开通一下SMTP、POP3啥的,然后生成一串只能你自己知道的授权码,使用这个授权码和你的邮箱账号才能第三方发送邮件,你的QQ密码不行的噢!

下面就直接贴代码了,其实和之前的代码差不多,发送邮件调取python强大的库即可,溜了并且留下代码:

#!/usr/bin/python2
# -*- coding: UTF-8 -*-

import urllib2
from bs4 import BeautifulSoup
import MySQLdb
import smtplib
from email.mime.text import MIMEText
from email.header import Header

def sendMail(receivers,content):
    # 第三方 SMTP 服务
    mail_host = "smtp.qq.com"  # 设置服务器(使用第三方 SMTP 服务发送或本机安装 sendmail(邮件传输代理程序)localhost)
    mail_user = "2365285094@qq.com"  # 服务器用户名,也就是你的邮箱账号
    mail_pass = "xxxxxxx"  # 口令 授权码

    sender = '2365285094@qq.com' #发送者邮箱

    message = MIMEText(content, 'plain', 'utf-8')
    message['From'] = Header("xxxx2号", 'utf-8') #发送人
    message['To'] = Header("xxxx1号", 'utf-8')  #接收人

    subject = '每日虎扑新闻爬取'  #邮件标题
    message['Subject'] = Header(subject, 'utf-8')

    try:
        smtpObj = smtplib.SMTP_SSL(mail_host, 465)# 465 为 SMTP 端口号 链接服务器
        smtpObj.login(mail_user, mail_pass)
        smtpObj.sendmail(sender, receivers, message.as_string())
        print "email success"
    except smtplib.SMTPException:
        print "Error: cannot send email"

#返回向url请求所获得的html
def getHtml(url):
    rs = urllib2.urlopen(url)
    return rs.read()

# 打开数据库连接
# db = MySQLdb.connect("xxxxxx", "root", "xxxx", "news_test", charset='utf8' )
# 使用cursor()方法获取操作游标
# cursor = db.cursor()
#虎扑NBA新闻的首页地址是https://voice.hupu.com/nba/1"
#下一页就是2,再下一页就是3
def getNewsListByPage(page):
    html = getHtml("https://voice.hupu.com/nba/"+str(page))
    soup = BeautifulSoup(html,"html.parser")
    news_list = soup.select(".list-hd")
    #print len(news_list)
    return news_list
pages_list = []

#这里我只遍历前三页,我记得一共是12页?因为当天最新的新闻也不会在那么后面
for i in range(1,3):
    for news in getNewsListByPage(i):
        pages_list.append(news.h4.a['href'])

#判断关键词是否在sql里面出现
def isContains(interests,sql):
    for i in range(len(interests)):
        if interests[i] in sql:
            return True
    return False

#对每一个单一的新闻链接请求 获取标题和新闻内容
#我是勇士球迷
interests = ["库里","勇士","杜兰特","克莱"]
email_content=''
flag = False
for i in range(len(pages_list)):
    #print "执行=======第"+str(i)+"条新闻"
    curr_html = getHtml(pages_list[i])
    curr_soup = BeautifulSoup(curr_html,"html.parser")
    #print len(curr_soup.select(".headline"))
    title = curr_soup.select(".headline")[0].get_text(strip=True).encode('utf-8')
    #print len(curr_soup.select(".artical-main-content"))
    content = curr_soup.select(".artical-main-content")[0].get_text(strip=True).encode('utf-8')
    # select_sql = "select * from hupu_nba_news where title like '%"+title+"%'"
    # #查询数据库是否有该条记录,否则插入
    # try:
    #     cursor.execute(select_sql)
    #     results = cursor.fetchall()
    #     if len(results) > 0:
    #         continue
    # except:
    #     print "查询错误"
    # insert_sql = 'insert into hupu_nba_news(title,content) values("'+title+'","'+content+'");'
    # #print sql
    # if isContains(interests,insert_sql):
    #     try:
    #     # 执行sql语句
    #         cursor.execute(insert_sql)
    #     # 提交到数据库执行
    #         db.commit()
    #     except:
    #     # Rollback in case there is any error
    #         db.rollback()
    if isContains(interests,title):
        flag = True
        email_content += '标题:'+title+'\n\n'+'内容:'+content+'\n\n\n'
#发送邮件
if flag:
    sendMail(['970014590@qq.com'],email_content)
print "done"
# db.close()

毕,见笑了,有什么不懂的可以在下面留言。

勇士总冠军!


评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值