爬虫 监控360论坛 有帖子自动发邮件

代码粗糙,凑合看,24小时运行没问题,写在这备忘

啰嗦扯蛋版本

#*-coding:utf-8-*-
import urllib2
import re
import smtplib
from email.mime.text import MIMEText

#邮箱账号密码

_user = "xxxxxxxxx@qq.com"
_pwd = "xxxxxxxxxxxx"
_to = "xxxxxxxxxxx@360.cn"

#爬卫士板块第一页的帖子

request = urllib2.Request("http://bbs.360.cn/forum-140-1.html")
response = urllib2.urlopen(request)
buffer = response.read()
#<a href="http://bbs.360.cn/thread-14658847-1-1.html" target="_blank" class="s xst">么子我破网速</a>

#正则表达式筛选解析链接

getarticlelist = re.compile(r'http://bbs.360.cn/thread-.+-1-1.html" \s?target="_blank" class="s xst".*\s*</a>')
pagemsg = re.findall(getarticlelist,buffer)

savelisturl = open('output.html', 'a')
savelisturl.write("<html>")
savelisturl.write("<body>")
savelisturl.write("<table>")

# 从pagemsg里面取出每条匹配内容到listitem,用>分割str字符串为链接和标题,并写入output.html之中

for listitem in pagemsg:
    print listitem
    savelisturl.write("<tr>")
    gettitlenamelist = listitem.split('>')
    savelisturl.write("<td><a href = %s target='_blank'> %s </a>  ..... %s </td>"  % (listitem[0:42], listitem[0:42], gettitlenamelist[1]))
    savelisturl.write("</tr>")

savelisturl.write("</table>")
savelisturl.write("</body>")
savelisturl.write("</html>")
savelisturl.close()

#反复执行,如果新取到的链接在老的里面存在,则pass,否则,写入output.html之中还发送邮件给我

while True:
    request1 = urllib2.Request("http://bbs.360.cn/forum-140-1.html")
    response1 = urllib2.urlopen(request1)
    buffer1 = response1.read()
    getarticlelist1 = re.compile(r'http://bbs.360.cn/thread-.+-1-1.html" \s?target="_blank" class="s xst".*\s*</a>')
    pagemsg1 = re.findall(getarticlelist1,buffer1)
    for listitem1 in pagemsg1:
        if listitem1 in pagemsg:
            pass
        else:
            savelisturl = open('output.html', 'a')
            savelisturl.write("<tr>")
            gettitlenamelist1 = listitem1.split('>')
            savelisturl.write("<td><a href = %s target='_blank'> %s </a>  ..... %s </td>"  % (listitem1[0:42], listitem1[0:42], gettitlenamelist1[1]))
            savelisturl.write("</tr>")
            savelisturl.close()



            pagemsg.append(listitem1)
            msg = MIMEText(listitem1[42:-1])
            msg["Subject"] = listitem1[0:42]
            msg["From"] = _user
            msg["To"] = _to
            try:
                s = smtplib.SMTP_SSL("smtp.qq.com", 465)
                s.login(_user, _pwd)
                s.sendmail(_user, _to, msg.as_string())
                s.quit()
                print "Send Email Success!"
            except smtplib.SMTPException, e:
                print "Send Email Falied,%s" % e

下面是短小精悍版

#*-coding:utf-8-*-
import urllib2
import re
import smtplib
import time
from email.mime.text import MIMEText

purposurl = str(raw_input("Input need focus url such as :http://bbs.360.cn/forum-140-1.html :"))
toemailaddress = str(raw_input("Input rec-email address such as : XXXXXXXXX@360.cn :"))
print '''the system is running !!! ' \
      'please do not close the CMD window!!!'''
_user = "XXXXXXXXX@qq.com"
_pwd = "XXXXXXXXXXXXXXX"
_to = toemailaddress
request = urllib2.Request(purposurl)
response = urllib2.urlopen(request)
buffer = response.read()
getarticlelist = re.compile(r'http://bbs.360.cn/thread-.+-1-1.html" \s?target="_blank" class="s xst".*\s*</a>')
pagemsg = re.findall(getarticlelist,buffer)
while True:
    time.sleep(5)
    request1 = urllib2.Request(purposurl)
    response1 = urllib2.urlopen(request1)
    buffer1 = response1.read()
    getarticlelist1 = re.compile(r'http://bbs.360.cn/thread-.+-1-1.html" \s?target="_blank" class="s xst".*\s*</a>')
    pagemsg1 = re.findall(getarticlelist1,buffer1)
    for listitem1 in pagemsg1:
        if listitem1 in pagemsg:
            pass
        else:
            pagemsg.append(listitem1)
            msg = MIMEText(listitem1[74:-4])
            msg["Subject"] = listitem1[0:42]
            msg["From"] = _user
            msg["To"] = _to
            try:
                s = smtplib.SMTP_SSL("smtp.qq.com", 465)
                s.login(_user, _pwd)
                s.sendmail(_user, _to, msg.as_string())
                s.quit()
                print 'find a new article'
                print "Send Email Success!"
            except smtplib.SMTPException, e:
                print "Send Email Falied,%s" % e
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值