from sgmllib import SGMLParserfrom string import find, replace, rjustfrom threading import Threadimport urllib__author__ = "Chen Peng (peng.ch@hotmail.com)"__version__ = "$Revision: 1.0 $"__date__ = "$Date: 2006/03/03 $"__copyright__ = "Copyright (c) 2006 Chen Peng"__license__ = "Python"__all__ = ["Gif_163_Parser"]class PDownloadThread( Thread ): """ Download the files in the dict and save them to local files with the given name """ def __init__( self, DictList,i ): Thread.__init__( self ) self.DictList = DictList self.pageno = str(i) def run( self ): for k in self.DictList.keys(): try: print 'Download' + self.DictList[k] + '......' uFile = urllib.urlretrieve( self.DictList[k], './files/' + k + '.' + self.DictList[k].split('.')[self.DictList[k].split('.').__len__() - 1]) except : logfile = open('error.log', 'a') logfile.write(self.pageno + ' ' + self.DictList[k] + ' ' + k + ' ') logfile.close() print 'Save to file ' + kclass Gif_163_Parser( SGMLParser ): """ 任务:下载163彩图 原理:http://mms.163.com/new_web/cm_lv2_pic.jsp?catID=&ord=dDate&page=2&type=1&key= 从1到415页(共6637)分析得到如下路径:“/fgwx/hhsj/1_060302175613_186/128x128.gif” eg:<script>showPic('22930','1','/fgwx/hhsj/1_060302175613_186/128x128.gif', '1','编号:22930 名字: 因为有你 人气:100');</script> 下载路径:http://mmsimg.163.com/new_web/loaditem.jsp/type=1/path=/fgwx/llfj/1_060302175612_995/176x176.gif """ def reset( self ): SGMLParser.reset( self ) self.headURL='http://mmsimg.163.com/new_web/loaditem.jsp/type=1/path=' self.SubURL = [] self.Links = {} def start_script( self, attrs ): #self.SubURL.extend( [' %s="%s"' % ( key, value ) for key, value in attrs] ) pass def end_script( self ): pass def handle_data( self, text ): if find( text, 'showPic' )!=-1: self.Links[replace( text.split( '/n' )[1], 'Ãû×Ö: ', '' )]=self.headURL+replace ( text.split( ',' )[2], ''', '' ); def Execute( self ): for i in range( 1, 415 ): self.Links.clear try: usock = urllib.urlopen( "http://mms.163.com/new_web/cm_lv2_pic.jsp?catID=&ord=dDate&page=" + str(i) + "&type=1&key=" ) self.feed( usock.read() ) usock.close() TestThread=PDownloadThread( self.Links ,i ) TestThread.start() self.close() except IOError: pass #print ( ["%s=%sn"% ( k, self.Links[k] ) for k in self.Links.keys()] ) #print self.Linksif __name__ == '__main__': #Gif_163_Parser().Execute() testtask=Gif_163_Parser() testtask.Execute()