下载豆瓣友邻相册

最新推荐文章于 2021-07-16 00:31:29 发布

yidangui

最新推荐文章于 2021-07-16 00:31:29 发布

阅读量2.2k

点赞数

分类专栏： Python源码文章标签： Python

Python源码专栏收录该内容

22 篇文章 0 订阅

订阅专栏

开发环境：WIn7，Python2.7
使用了第三方的类库：BeautifulSoup
后期还需要增加多线程处理。

图片会存放到代码所在目录 DownFile下。

标签： <无>

代码片段(1)

[文件] DownDoubBanImages.py ~ 6KB 下载(16)

 
       001 # coding:utf-8
 
       002 __author__ = 'zz'
 
       003  
 
       004 import os,wx
 
       005 import urllib
 
       006 import sys
 
       007 from bs4 import BeautifulSoup
 
       008  
 
       009 reload(sys) 
 
       010 sys.setdefaultencoding('utf8')
 
       011  
 
       012 #自定义输出图片下载信息
 
       013 class ImageOutMessage():
 
       014     def __init__(self,obj):
 
       015         self.Object = obj
 
       016     def emit(self,strMessage):
 
       017         if strMessage:
 
       018             self.Object.AppendText(strMessage)
 
       019  
 
       020 class main_windows(wx.Frame):
 
       021     def __init__(self):
 
       022         wx.Frame.__init__(self,None,-1,"Down DouBan Image",size = (450,400))
 
       023         bkg = wx.Panel(self,-1)
 
       024  
 
       025         DownImageButton = wx.Button(bkg,label = "DownImage")
 
       026         DownImageButton.Bind(wx.EVT_BUTTON,self.DownImage)
 
       027  
 
       028         self.UrlText = wx.TextCtrl(bkg)
 
       029         self.contents = wx.TextCtrl(bkg,style = wx.TE_MULTILINE)
 
       030         self.contents.SetEditable(False)
 
       031  
 
       032         hbox = wx.BoxSizer()
 
       033         hbox.Add(self.UrlText,proportion = 1,flag=wx.EXPAND)
 
       034         hbox.Add(DownImageButton,proportion = 0,flag= wx.LEFT,border = 5)
 
       035  
 
       036         vbox = wx.BoxSizer(wx.VERTICAL)
 
       037         vbox.Add(hbox,proportion = 0,flag = wx.EXPAND,border =5)
 
       038         vbox.Add(self.contents,proportion = 1,flag = wx.EXPAND | wx.LEFT | wx.BOTTOM | wx.RIGHT,border = 5)
 
       039  
 
       040         bkg.SetSizer(vbox)
 
       041  
 
       042     def ReadHtml(self,src):
 
       043         try:
 
       044             content = urllib.urlopen(src).read()
 
       045             strHtml = BeautifulSoup(''.join(content))
 
       046             return strHtml
 
       047         except Exception,ex:
 
       048             self.contents.AppendText("STOP,ERROR:%s.\n"%(ex))
 
       049  
 
       050     def NextPage(self,strUrl):
 
       051         try:
 
       052             #从页面Html源码中获取下一个页面地址，最后一页返回None
 
       053             content = self.ReadHtml(strUrl)
 
       054             strHref = None
 
       055             for line in content('link'):
 
       056                 if line.find_all(rel= 'next') > 0 :
 
       057                     if (line.get('href')).find('start=') > 0:
 
       058                         strHref = line.get('href')
 
       059  
 
       060             if strHref:
 
       061                 return strHref
 
       062             else:
 
       063                 return None
 
       064         except Exception,ex:
 
       065             self.contents.AppendText("STOP,ERROR:%s.\n"%(ex))
 
       066  
 
       067  
 
       068     def PicInfo(self,src):
 
       069         try:
 
       070             #从Html源码中获取全部图片的相对地址
 
       071             lstPicHref = []
 
       072             content = self.ReadHtml(src)
 
       073             for line in content('img'):
 
       074                 # 判断img地址是否为缩略图
 
       075                 if (line.get('src')).find('thumb/public') > 0:
 
       076                     strPicHref = line.get('src')
 
       077                     #将缩略图地址修改为原图地址
 
       078                     strPicHref = strPicHref.replace('thumb','photo')
 
       079                     lstPicHref.append(strPicHref)
 
       080                     #判断返回值
 
       081             if lstPicHref:
 
       082                 return lstPicHref
 
       083             else:
 
       084                 return None
 
       085         except Exception,ex:
 
       086             self.contents.AppendText("STOP,ERROR:%s.\n"%(ex))
 
       087  
 
       088     def WritePic(self,HtmlTitle,listPicHref,FilePath):
 
       089         try:
 
       090             #获取当前页面Title对应的文件夹路径
 
       091             strFilePath = FilePath + HtmlTitle + '\\'
 
       092             #对获取到的Title的编码形式进行转换
 
       093             if isinstance(strFilePath, unicode):
 
       094                 strFilePath.encode('gb2312')
 
       095             else:
 
       096                 strFilePath.decode('utf-8').encode('gb2312')
 
       097                 #判断strFilePath是否存在 ，不存在创建该目录
 
       098             if not os.path.exists(strFilePath):
 
       099                 os.mkdir(strFilePath)
 
       100  
 
       101             PicLength = len(listPicHref)
 
       102             self.contents.AppendText("%s.\n"%( 'Current page {} picture waiting for download...'.format(PicLength)))
 
       103             i = 1
 
       104             for item in listPicHref:
 
       105                 strPicName = item.split("/")
 
       106                 #将图片写入到本地指定路径
 
       107                 urllib.urlretrieve(item,strFilePath + strPicName[7],None)
 
       108                 self.contents.AppendText("%s.\n"%('Download picture {}/{}:{}'.format(i,PicLength,strPicName[7])))
 
       109                 i += 1
 
       110         except Exception,ex:
 
       111             self.contents.AppendText("STOP,ERROR:%s.\n"%(ex))
 
       112  
 
       113  
 
       114     def DownImage(self,event):
 
       115         try:
 
       116             strUrl = self.UrlText.GetValue()
 
       117             #验证strUrl格式是否符合要求
 
       118             if strUrl.find("www.douban.com/photos/album/") > 0:
 
       119                 if strUrl:
 
       120                     # 获取页面Title
 
       121                     strTitle = self.ReadHtml(strUrl).html.head.title.string
 
       122                     strFilePath = os.getcwd() + '\\DownFile\\'
 
       123                     strTitle = ''.join(strTitle.split())
 
       124                     #验证图片存放路径是否存在
 
       125                     if not os.path.exists(strFilePath):
 
       126                         os.mkdir(strFilePath)
 
       127                     while strUrl:
 
       128                         #验证下一页路径是否重复
 
       129                         if strUrl.find('start') > 0:
 
       130                             strPrevNumber =strUrl.split('=')
 
       131                         else:
 
       132                             strPrevNumber = ['1','0']
 
       133  
 
       134                         listPicHref = self.PicInfo(strUrl)
 
       135                         self.contents.AppendText("%s.\n"%(strUrl))
 
       136                         self.WritePic(strTitle,listPicHref,strFilePath)
 
       137                         strUrl = self.NextPage(strUrl)
 
       138                         #判断是否有下一页Url
 
       139                         if not strUrl:
 
       140                             break
 
       141                         strUrlNumber = strUrl.split('=')
 
       142                         if int(strPrevNumber[1]) > int(strUrlNumber[1]):
 
       143                             break
 
       144                     self.contents.AppendText("Download complete")
 
       145                 else:
 
       146                     self.contents.AppendText("URL cannot be empty")
 
       147             else:
 
       148                 self.contents.SetValue("")
 
       149                 self.contents.AppendText("URL format is invalid, for example:\n %s"%("http://www.douban.com/photos/album/92848474/"))
 
       150         except Exception,ex:
 
       151             self.contents.AppendText("STOP,ERROR:%s.\n"%(ex))
 
       152  
 
       153 class App(wx.App):
 
       154     def OnInit(self):
 
       155         self.frame = main_windows()
 
       156         self.frame.Show(True)
 
       157         self.SetTopWindow(self.frame)
 
       158         return True
 
       159  
 
       160  
 
       161 if __name__ == "__main__":
 
       162     app = App()
 
       163     app.MainLoop()

yidangui

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
下载豆瓣友邻相册

开发环境：WIn7，Python2.7 使用了第三方的类库：BeautifulSoup 后期还需要增加多线程处理。图片会存放到代码所在目录 DownFile下。标签：代码片段(1)[文件] DownDoubBanImages.py ~ 6KB 下载(16)view sourcepr
复制链接

扫一扫