百度贴吧挖坟实时监控 python版

有人挖坟会挖坟后即刻删掉自己的回复。让吧务不知道是谁把坟贴挖上来。
代码如下

'''
# -*- coding: utf-8 -*-
这段代码实现的是实时监控,一旦发现有挖坟者,自动在屏幕上显示。并且每隔20分钟向D盘存入一份txt文档,内容是当前捕获的嫌疑人ID名单
'''
import urllib
import re
import time
starttime = time.time()
while 1:
    html = '要监控的贴吧页面'
    tiebafile = urllib.urlopen(html)
    neirong = tiebafile.read().decode('utf-8').encode('gbk')
    url123 = re.finditer('/p/',neirong)
    urllist = []
    timelist = []
    for i in url123:
        tiebaurl = 'http://tieba.baidu.com/p/'+neirong[i.end():i.end()+10]
        if tiebaurl[-1] ==r'"':
            tiebaurl = tiebaurl[:-1]
        urllist.append(tiebaurl)
    urllist.remove(urllist[50])
    urllist.remove(urllist[0])    
    urllist.remove(urllist[0])
    urllist.remove(urllist[0])
    renming = re.finditer('最后回复人:'.decode('utf-8').encode('gbk'),neirong)
    namelist = []
    for i in renming:
        namelist.append(neirong[i.start()+12:i.start()+30]+'\n')
    for i in range(48):
        sonurl = urllib.urlopen(urllist[i]).read()
        timelist.append(sonurl[sonurl.find('j_reply_data">')+14:sonurl.find('j_reply_data">')+24])
    def time_text(timestr):
        time_text_time = timestr.split('-')
        yeartime = int(time_text_time[0])
        monthtime = int(time_text_time[1])
        daytime = int(time_text_time[2])
        deltatime = (time.localtime()[0]-yeartime)*365 + (time.localtime()[1] - monthtime)*30 + (time.localtime()[2] - daytime)
        if deltatime > 90:
            return -1
    xianyiren = []
    for i in range(48):
        if time_text(timelist[i]) == -1:
            print r'这个人有挖坟嫌疑:'.decode('utf-8').encode('gbk'),namelist[i]
            if namelist[i] not in xianyiren:
                xianyiren.append(namelist[i])
    print r'############################################'
    righttime = (time.localtime()[0].__str__() + r'年'+time.localtime()[1].__str__() + r'月' + time.localtime()[2].__str__() + r'日' +time.localtime()[3].__str__() +r'时' + time.localtime()[4].__str__() + r'分' + time.localtime()[5].__str__() + r'秒').decode('utf-8').encode('gbk')
    print r'现在时间是:'.decode('utf-8').encode('gbk'),righttime
    thistime = time.time()
    if (int(thistime) - int(starttime)) >=1200:
        f1 = open('d:\\'+time.localtime()[:6].__str__().replace(',','-')+'.txt','w')
        for i in xianyiren:
            f1.write(i + '\n')    
        f1.close()
        starttime = thistime'
    tiebafile = urllib.urlopen(html)
    neirong = tiebafile.read().decode('utf-8').encode('gbk')
    url123 = re.finditer('/p/',neirong)
    urllist = []
    timelist = []
    for i in url123:
        tiebaurl = 'http://tieba.baidu.com/p/'+neirong[i.end():i.end()+10]
        if tiebaurl[-1] ==r'"':
            tiebaurl = tiebaurl[:-1]
        urllist.append(tiebaurl)
    urllist.remove(urllist[50])   #这个是删除广告链接
    urllist.remove(urllist[0])    #这里是对置顶帖的排除。有几个置顶帖就从前面删几个
    urllist.remove(urllist[0])
    urllist.remove(urllist[0])
    renming = re.finditer('最后回复人:'.decode('utf-8').encode('gbk'),neirong)
    namelist = []
    for i in renming:
        namelist.append(neirong[i.start()+12:i.start()+30]+'\n')
    for i in range(48):
        sonurl = urllib.urlopen(urllist[i]).read()
        timelist.append(sonurl[sonurl.find('j_reply_data">')+14:sonurl.find('j_reply_data">')+24])
    def time_text(timestr):
        time_text_time = timestr.split('-')
        yeartime = int(time_text_time[0])
        monthtime = int(time_text_time[1])
        daytime = int(time_text_time[2])
        deltatime = (time.localtime()[0]-yeartime)*365 + (time.localtime()[1] - monthtime)*30 + (time.localtime()[2] - daytime)
        if deltatime > 90:
            return -1
    xianyiren = []
    for i in range(48):
        if time_text(timelist[i]) == -1:
            print r'这个人有挖坟嫌疑:'.decode('utf-8').encode('gbk'),namelist[i]
            if namelist[i] not in xianyiren:
                xianyiren.append(namelist[i])
    print r'############################################'
    righttime = (time.localtime()[0].__str__() + r'年'+time.localtime()[1].__str__() + r'月' + time.localtime()[2].__str__() + r'日' +time.localtime()[3].__str__() +r'时' + time.localtime()[4].__str__() + r'分' + time.localtime()[5].__str__() + r'秒').decode('utf-8').encode('gbk')
    print r'现在时间是:'.decode('utf-8').encode('gbk'),righttime
    thistime = time.time()
    if (int(thistime) - int(starttime)) >=1200:
        f1 = open('d:\\'+time.localtime()[:6].__str__().replace(',','-')+'.txt','w')
        for i in xianyiren:
            f1.write(i + '\n')    
        f1.close()
        starttime = thistime
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值