代码片段(1)
[文件] DownDoubBanImages.py ~ 6KB 下载(16)
001 | # coding:utf-8 |
002 | __author__ = 'zz' |
003 |
004 | import os,wx |
005 | import urllib |
006 | import sys |
007 | from bs4 import BeautifulSoup |
008 |
009 | reload (sys) |
010 | sys.setdefaultencoding( 'utf8' ) |
011 |
012 | #自定义输出图片下载信息 |
013 | class ImageOutMessage(): |
014 | def __init__( self ,obj): |
015 | self . Object = obj |
016 | def emit( self ,strMessage): |
017 | if strMessage: |
018 | self . Object .AppendText(strMessage) |
019 |
020 | class main_windows(wx.Frame): |
021 | def __init__( self ): |
022 | wx.Frame.__init__( self , None , - 1 , "Down DouBan Image" ,size = ( 450 , 400 )) |
023 | bkg = wx.Panel( self , - 1 ) |
024 |
025 | DownImageButton = wx.Button(bkg,label = "DownImage" ) |
026 | DownImageButton.Bind(wx.EVT_BUTTON, self .DownImage) |
027 |
028 | self .UrlText = wx.TextCtrl(bkg) |
029 | self .contents = wx.TextCtrl(bkg,style = wx.TE_MULTILINE) |
030 | self .contents.SetEditable( False ) |
031 |
032 | hbox = wx.BoxSizer() |
033 | hbox.Add( self .UrlText,proportion = 1 ,flag = wx.EXPAND) |
034 | hbox.Add(DownImageButton,proportion = 0 ,flag = wx.LEFT,border = 5 ) |
035 |
036 | vbox = wx.BoxSizer(wx.VERTICAL) |
037 | vbox.Add(hbox,proportion = 0 ,flag = wx.EXPAND,border = 5 ) |
038 | vbox.Add( self .contents,proportion = 1 ,flag = wx.EXPAND | wx.LEFT | wx.BOTTOM | wx.RIGHT,border = 5 ) |
039 |
040 | bkg.SetSizer(vbox) |
041 |
042 | def ReadHtml( self ,src): |
043 | try : |
044 | content = urllib.urlopen(src).read() |
045 | strHtml = BeautifulSoup(''.join(content)) |
046 | return strHtml |
047 | except Exception,ex: |
048 | self .contents.AppendText( "STOP,ERROR:%s.\n" % (ex)) |
049 |
050 | def NextPage( self ,strUrl): |
051 | try : |
052 | #从页面Html源码中获取下一个页面地址,最后一页返回None |
053 | content = self .ReadHtml(strUrl) |
054 | strHref = None |
055 | for line in content( 'link' ): |
056 | if line.find_all(rel = 'next' ) > 0 : |
057 | if (line.get( 'href' )).find( 'start=' ) > 0 : |
058 | strHref = line.get( 'href' ) |
059 |
060 | if strHref: |
061 | return strHref |
062 | else : |
063 | return None |
064 | except Exception,ex: |
065 | self .contents.AppendText( "STOP,ERROR:%s.\n" % (ex)) |
066 |
067 |
068 | def PicInfo( self ,src): |
069 | try : |
070 | #从Html源码中获取全部图片的相对地址 |
071 | lstPicHref = [] |
072 | content = self .ReadHtml(src) |
073 | for line in content( 'img' ): |
074 | # 判断img地址是否为缩略图 |
075 | if (line.get( 'src' )).find( 'thumb/public' ) > 0 : |
076 | strPicHref = line.get( 'src' ) |
077 | #将缩略图地址修改为原图地址 |
078 | strPicHref = strPicHref.replace( 'thumb' , 'photo' ) |
079 | lstPicHref.append(strPicHref) |
080 | #判断返回值 |
081 | if lstPicHref: |
082 | return lstPicHref |
083 | else : |
084 | return None |
085 | except Exception,ex: |
086 | self .contents.AppendText( "STOP,ERROR:%s.\n" % (ex)) |
087 |
088 | def WritePic( self ,HtmlTitle,listPicHref,FilePath): |
089 | try : |
090 | #获取当前页面Title对应的文件夹路径 |
091 | strFilePath = FilePath + HtmlTitle + '\\' |
092 | #对获取到的Title的编码形式进行转换 |
093 | if isinstance (strFilePath, unicode ): |
094 | strFilePath.encode( 'gb2312' ) |
095 | else : |
096 | strFilePath.decode( 'utf-8' ).encode( 'gb2312' ) |
097 | #判断strFilePath是否存在 ,不存在创建该目录 |
098 | if not os.path.exists(strFilePath): |
099 | os.mkdir(strFilePath) |
100 |
101 | PicLength = len (listPicHref) |
102 | self .contents.AppendText( "%s.\n" % ( 'Current page {} picture waiting for download...' . format (PicLength))) |
103 | i = 1 |
104 | for item in listPicHref: |
105 | strPicName = item.split( "/" ) |
106 | #将图片写入到本地指定路径 |
107 | urllib.urlretrieve(item,strFilePath + strPicName[ 7 ], None ) |
108 | self .contents.AppendText( "%s.\n" % ( 'Download picture {}/{}:{}' . format (i,PicLength,strPicName[ 7 ]))) |
109 | i + = 1 |
110 | except Exception,ex: |
111 | self .contents.AppendText( "STOP,ERROR:%s.\n" % (ex)) |
112 |
113 |
114 | def DownImage( self ,event): |
115 | try : |
116 | strUrl = self .UrlText.GetValue() |
117 | #验证strUrl格式是否符合要求 |
118 | if strUrl.find( "www.douban.com/photos/album/" ) > 0 : |
119 | if strUrl: |
120 | # 获取页面Title |
121 | strTitle = self .ReadHtml(strUrl).html.head.title.string |
122 | strFilePath = os.getcwd() + '\\DownFile\\' |
123 | strTitle = ''.join(strTitle.split()) |
124 | #验证图片存放路径是否存在 |
125 | if not os.path.exists(strFilePath): |
126 | os.mkdir(strFilePath) |
127 | while strUrl: |
128 | #验证下一页路径是否重复 |
129 | if strUrl.find( 'start' ) > 0 : |
130 | strPrevNumber = strUrl.split( '=' ) |
131 | else : |
132 | strPrevNumber = [ '1' , '0' ] |
133 |
134 | listPicHref = self .PicInfo(strUrl) |
135 | self .contents.AppendText( "%s.\n" % (strUrl)) |
136 | self .WritePic(strTitle,listPicHref,strFilePath) |
137 | strUrl = self .NextPage(strUrl) |
138 | #判断是否有下一页Url |
139 | if not strUrl: |
140 | break |
141 | strUrlNumber = strUrl.split( '=' ) |
142 | if int (strPrevNumber[ 1 ]) > int (strUrlNumber[ 1 ]): |
143 | break |
144 | self .contents.AppendText( "Download complete" ) |
145 | else : |
146 | self .contents.AppendText( "URL cannot be empty" ) |
147 | else : |
148 | self .contents.SetValue("") |
149 | self .contents.AppendText( "URL format is invalid, for example:\n %s" % ( "http://www.douban.com/photos/album/92848474/" )) |
150 | except Exception,ex: |
151 | self .contents.AppendText( "STOP,ERROR:%s.\n" % (ex)) |
152 |
153 | class App(wx.App): |
154 | def OnInit( self ): |
155 | self .frame = main_windows() |
156 | self .frame.Show( True ) |
157 | self .SetTopWindow( self .frame) |
158 | return True |
159 |
160 |
161 | if __name__ = = "__main__" : |
162 | app = App() |
163 | app.MainLoop() |