最近chinajoy非常火爆,所以写了一个抓取showgirl图片的小程序和大家分享一下
import urllib.request
import re
import os
import shutil
def getreport(peramount, persize, totalsize):
if not peramount:
print("connection opened")
return
amount_read = peramount * persize;
print('%d / %d, %.2f%%' % (amount_read, totalsize, amount_read * 100.0 / totalsize))
def getHtml(url):
page = urllib.request.urlopen(url)
return page.read()
def getImg(html):
reg = re.compile(b'src="(http.*\.jpg)"');
imglist = re.findall(reg, html)
x = 1
try:
shutil.rmtree('./photoes')
except FileNotFoundError:
pass
os.mkdir('photoes')
for iurl in imglist:
iurl = iurl.decode()
urllib.request.urlretrieve(iurl, './photoes/%s.jpg' % x, reporthook=getreport)
x += 1
getImg(getHtml('http://image.baidu.com/activity/4110925744?id=53&mode=viewer'))
这个程序有个小问题,就是进度会大于100%,有点神奇