[Project]联盟助手-python

以下4个文件分别为get.py       makenew.py     pro.py      out.py
# ! /usr/bin/env python
from  sgmllib  import  SGMLParser
import  urllib 

class  URLLister(SGMLParser):
    
def  reset(self):                             
        SGMLParser.reset(self)
        self.urls 
=  []

    
def  start_a(self, attrs):                      
        href 
=  [v  for  k, v  in  attrs  if  k == ' href ' ]  
        
if  href:   
            self.urls.extend(href)
# !/usr/bin/env python
#
encoding=utf-8

import  urllib,get  

# ---start:to correct the input, if lack of thr Protocal Name-------------
d = ' http://www.bitunion.org '
def  panduan_http(d):
    
global  req 
    req
= ''  
    
if  d.startswith( ' http:// ' ):
        req 
=  d
    
else :
        req 
=   ' http:// ' + d
    
return  req
# ---end------------------------------------------------------------------

sock
=  urllib.urlopen(panduan_http(d))
sword 
= get.URLLister()
sword.feed(sock.read())
sword.close()
bbscan
= []

#  sword.urls  is  a list which consists of the ALL  URLS
for  url  in   sword.urls:
    
    
if  url.startswith( ' redirect ' ):      # search from the Charactor  to choose the  URL  for which we want
        b = req + ' / '
        bbsinfo
= b + url
        bbscan.append(bbsinfo)
bbsold
= bbscan

class  nail():
    kee
= bbsold                                                                                    
# !/usr/bin/env python
#
encoding=utf-8

from  sgmllib  import  SGMLParser
import  makenew,re

# ----start:the module is to seperate thr CH-strings----------------------

class  Parse(SGMLParser):
    
def  reset(self):
        self.found_title 
=  0
        SGMLParser.reset(self)
    
def  start_title(self, attrs):
        self.found_title 
+=   1
    
def  end_title(self):
        self.found_title 
-=   1
    
def  handle_data(self, text):
        
if  self.found_title  >  0:
            aa
= re.findall( ' -(?P<data>.*)- '  , text)
            
for  a  in  aa:
                
print   " %s " %  a
# -----end----------------------------------------------------------------



guai
= makenew.nail()
bbss
= guai.kee

newurls
= []

for  hard  in  bbss:
    a
= [i  for  i  in  re.findall( ' tid=(?P<data>.*)&goto ' ,hard)]    # get out  part the string -ID
     for  j  in   a:
        newurl
= r ' http://www.bitunion.org/thread- ' + j + ' -1-1.html '    # make up new URL-of the ZhuTiTie
        newurls.append(newurl)
link
= newurls

# !/usr/bin/env python
#
encoding=utf-8

import  pro,urllib


print   ' - ' * 70
for  newurl  in  pro.link:
    
print  newurl

 
    sock 
=  urllib.urlopen(newurl)
    html 
=  sock.read()
    sock.close()
    html 
=  unicode(html, " gbk "  )
    p 
=  pro.Parse()

    

    p.feed(html)
    
print   ' - ' * 70

    
    
                    
# complete by  freefis    in  7:19 AM Dec,22,07
                     # version-0.12B
 
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值