代码粗糙,凑合看,24小时运行没问题,写在这备忘
啰嗦扯蛋版本
#*-coding:utf-8-*-
import urllib2
import re
import smtplib
from email.mime.text import MIMEText
#邮箱账号密码
_user = "xxxxxxxxx@qq.com"
_pwd = "xxxxxxxxxxxx"
_to = "xxxxxxxxxxx@360.cn"
#爬卫士板块第一页的帖子
request = urllib2.Request("http://bbs.360.cn/forum-140-1.html")
response = urllib2.urlopen(request)
buffer = response.read()
#<a href="http://bbs.360.cn/thread-14658847-1-1.html" target="_blank" class="s xst">么子我破网速</a>
#正则表达式筛选解析链接
getarticlelist = re.compile(r'http://bbs.360.cn/thread-.+-1-1.html" \s?target="_blank" class="s xst".*\s*</a>')
pagemsg = re.findall(getarticlelist,buffer)
savelisturl = open('output.html', 'a')
savelisturl.write("<html>")
savelisturl.write("<body>")
savelisturl.write("<table>")
# 从pagemsg里面取出每条匹配内容到listitem,用>分割str字符串为链接和标题,并写入output.html之中
for listitem in pagemsg:
print listitem
savelisturl.write("<tr>")
gettitlenamelist = listitem.split('>')
savelisturl.write("<td><a href = %s target='_blank'> %s </a> ..... %s </td>" % (listitem[0:42], listitem[0:42], gettitlenamelist[1]))
savelisturl.write("</tr>")
savelisturl.write("</table>")
savelisturl.write("</body>")
savelisturl.write("</html>")
savelisturl.close()
#反复执行,如果新取到的链接在老的里面存在,则pass,否则,写入output.html之中还发送邮件给我
while True:
request1 = urllib2.Request("http://bbs.360.cn/forum-140-1.html")
response1 = urllib2.urlopen(request1)
buffer1 = response1.read()
getarticlelist1 = re.compile(r'http://bbs.360.cn/thread-.+-1-1.html" \s?target="_blank" class="s xst".*\s*</a>')
pagemsg1 = re.findall(getarticlelist1,buffer1)
for listitem1 in pagemsg1:
if listitem1 in pagemsg:
pass
else:
savelisturl = open('output.html', 'a')
savelisturl.write("<tr>")
gettitlenamelist1 = listitem1.split('>')
savelisturl.write("<td><a href = %s target='_blank'> %s </a> ..... %s </td>" % (listitem1[0:42], listitem1[0:42], gettitlenamelist1[1]))
savelisturl.write("</tr>")
savelisturl.close()
pagemsg.append(listitem1)
msg = MIMEText(listitem1[42:-1])
msg["Subject"] = listitem1[0:42]
msg["From"] = _user
msg["To"] = _to
try:
s = smtplib.SMTP_SSL("smtp.qq.com", 465)
s.login(_user, _pwd)
s.sendmail(_user, _to, msg.as_string())
s.quit()
print "Send Email Success!"
except smtplib.SMTPException, e:
print "Send Email Falied,%s" % e
下面是短小精悍版
#*-coding:utf-8-*-
import urllib2
import re
import smtplib
import time
from email.mime.text import MIMEText
purposurl = str(raw_input("Input need focus url such as :http://bbs.360.cn/forum-140-1.html :"))
toemailaddress = str(raw_input("Input rec-email address such as : XXXXXXXXX@360.cn :"))
print '''the system is running !!! ' \
'please do not close the CMD window!!!'''
_user = "XXXXXXXXX@qq.com"
_pwd = "XXXXXXXXXXXXXXX"
_to = toemailaddress
request = urllib2.Request(purposurl)
response = urllib2.urlopen(request)
buffer = response.read()
getarticlelist = re.compile(r'http://bbs.360.cn/thread-.+-1-1.html" \s?target="_blank" class="s xst".*\s*</a>')
pagemsg = re.findall(getarticlelist,buffer)
while True:
time.sleep(5)
request1 = urllib2.Request(purposurl)
response1 = urllib2.urlopen(request1)
buffer1 = response1.read()
getarticlelist1 = re.compile(r'http://bbs.360.cn/thread-.+-1-1.html" \s?target="_blank" class="s xst".*\s*</a>')
pagemsg1 = re.findall(getarticlelist1,buffer1)
for listitem1 in pagemsg1:
if listitem1 in pagemsg:
pass
else:
pagemsg.append(listitem1)
msg = MIMEText(listitem1[74:-4])
msg["Subject"] = listitem1[0:42]
msg["From"] = _user
msg["To"] = _to
try:
s = smtplib.SMTP_SSL("smtp.qq.com", 465)
s.login(_user, _pwd)
s.sendmail(_user, _to, msg.as_string())
s.quit()
print 'find a new article'
print "Send Email Success!"
except smtplib.SMTPException, e:
print "Send Email Falied,%s" % e