［Project］联盟助手－python

最新推荐文章于 2024-07-25 11:52:08 发布

freefis

最新推荐文章于 2024-07-25 11:52:08 发布

阅读量533

点赞数

文章标签： import encoding html redirect class python

本文链接：https://blog.csdn.net/freefis/article/details/1961543

版权

以下4个文件分别为get.py makenew.py pro.py out.py

# ! /usr/bin/env python

from sgmllib import SGMLParser

import urllib

class URLLister(SGMLParser):

def reset(self):

SGMLParser.reset(self)

self.urls = []

def start_a(self, attrs):

href = [v for k, v in attrs if k == ' href ' ]

if href:

self.urls.extend(href)

# !/usr/bin/env python

# encoding=utf-8

import urllib,get

# ---start:to correct the input, if lack of thr Protocal Name-------------

d = ' http://www.bitunion.org '

def panduan_http(d):

global req

req = ''

if d.startswith( ' http:// ' ):

req = d

else :

req = ' http:// ' + d

return req

# ---end------------------------------------------------------------------

sock = urllib.urlopen(panduan_http(d))

sword = get.URLLister()

sword.feed(sock.read())

sword.close()

bbscan = []

# sword.urls is a list which consists of the ALL URLS

for url in sword.urls:

if url.startswith( ' redirect ' ): # search from the Charactor to choose the URL for which we want

b = req + ' / '

bbsinfo = b + url

bbscan.append(bbsinfo)

bbsold = bbscan

class nail():

kee = bbsold

# !/usr/bin/env python

# encoding=utf-8

from sgmllib import SGMLParser

import makenew,re

# ----start:the module is to seperate thr CH-strings----------------------

class Parse(SGMLParser):

def reset(self):

self.found_title = 0

SGMLParser.reset(self)

def start_title(self, attrs):

self.found_title += 1

def end_title(self):

self.found_title -= 1

def handle_data(self, text):

if self.found_title > 0:

aa = re.findall( ' -(?P<data>.*)- ' , text)

for a in aa:

print " %s " % a

# -----end----------------------------------------------------------------

guai = makenew.nail()

bbss = guai.kee

newurls = []

for hard in bbss:

a = [i for i in re.findall( ' tid=(?P<data>.*)&goto ' ,hard)] # get out part the string -ID

for j in a:

newurl = r ' http://www.bitunion.org/thread- ' + j + ' -1-1.html ' # make up new URL-of the ZhuTiTie

newurls.append(newurl)

link = newurls

# !/usr/bin/env python

# encoding=utf-8

import pro,urllib

print ' - ' * 70

for newurl in pro.link:

print newurl

sock = urllib.urlopen(newurl)

html = sock.read()

sock.close()

html = unicode(html, " gbk " )

p = pro.Parse()

p.feed(html)

print ' - ' * 70

# complete by freefis in 7:19 AM Dec,22,07

# version-0.12B

freefis

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
［Project］联盟助手－python

以下4个文件分别为get.py makenew.py pro.py out.py#! /usr/bin/env pythonfrom sgmllib import SGMLParserimport urllib class URLLister(SGMLParser): def reset(self):
复制链接

扫一扫