贴吧抓指定内容(SteamKey为例)

Steam吧里经常有土豪发key所以就有了这个东西。。。

#coding:utf-8
from __future__ import print_function
from time import ctime,sleep
import sys
import re
import urllib2
import threading

keys=[]
IDs=[]
runing = []
userMainUrl = 'http://tieba.baidu.com/'
threads = []

class gKey:

    name = ''
    respHtml = ''


    i=1
    def __init__(self,n):
        self.name=n
        for j in range(10):
            threads.append("td"+str(j))
            threads[j] = threading.Thread(target=td_getKey,args=())
            #threads[j].setDaemon()
            threads[j].start()

    def searchInPage(self,p):
        req = urllib2.Request(userMainUrl + 'f?kw=' + self.name + '&&pn=' + str(50 * p))
        resp = urllib2.urlopen(req)
        respHtml = resp.read()
        match = re.findall(r'<a href="/p/\d+',respHtml)
        for m in match:
            if not m in IDs:
                runing.append(m[10:])


def td_getKey():
    while(1):
        if(len(runing)>0):
            pid=runing[0]
            runing.remove(runing[0])
            if(len(pid)>0):
                req = urllib2.Request(userMainUrl +pid)
                resp = urllib2.urlopen(req)
                respHtml = resp.read()
                inPage = re.search(r'class="red">\d+',respHtml)
                if(inPage!=None):
                    pageNum = int(inPage.group()[12:])
                    if(pageNum>1):
                        IDs.append(pid)
                    keyMatch = re.findall(r'(?<!-)[A-Z0-9]{5}-[A-Z0-9]{5}-[A-Z0-9]{5}-[A-Z0-9]{5}-[A-Z0-9]{5}(?!\-)',respHtml)
                    keyMatch2 = re.findall(r'(?<!-)[A-Z0-9]{5}-[A-Z0-9]{5}-[A-Z0-9]{5}(?!\-)',respHtml)
                    keyMatch.extend(keyMatch2)
                    for km in keyMatch:
                        if not km in keys:
                            keys.append(km)
                            author_left=respHtml.find('username="',respHtml.find(km,0))
                            giver = respHtml[author_left+10:respHtml.find('"',author_left+10)]
                            hosted_left=respHtml.find('id="post_content_',respHtml.find(km,0))
                            hosted = respHtml[hosted_left+17:respHtml.find('"',hosted_left+18)]
                            print("%s\r\n"%(ctime()))
                            print ("Get:%s \r\nFrom:%s At:%s"%(km,giver,userMainUrl+pid+"?pid="+hosted+"#"+hosted))

getKey=gKey('steam')
n=1
while(1):
    print ("Number %d Searching..."%n,end="\r")
    sys.stdout.flush()
    n=n+1
    getKey.searchInPage(0)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值