importurllib.requestimportosdefurl_open(url):
req=urllib.request.Request(url)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36')
response=urllib.request.urlopen(req)
html=response.read()print('url_open')returnhtmldefpage_htmls(url,count):
html= url_open(url).decode('utf-8')
pages=[]
a= html.find('a target="_blank" href=')
i=0while a != -1:
i+= 1b= html.find('.html',a,a+200)if b != -1:
pages.append(html[a+24:b+5])else:
b= a + 24a= html.find('a target="_blank" href=',b)if i ==count:break
for each inpages:print(each)returnpages'''
'''
deffind_imgs(url):
html= url_open(url).decode('utf-8')
imgs=[]
a= html.find('img src=')while a != -1:
b= html.find('.jpg',a,a+100)if b != -1:if html[a+9:b+4].find('http') == -1:
imgs.append('http:'+html[a+9:b+4])else:
imgs.append(html[a+9:b+4])else:
b= a + 9a= html.find('img src=',b)'''for each in imgs:
print(each)'''
returnimgsdefimgurl_get(url):
html= url_open(url).decode('utf-8')
imgurls=[]
a= html.find('color: #555555;" src=')while a != -1:
b= html.find('.jpg',a,a+100)if b != -1:
imgurls.append('http:'+html[a+22:b+4])else:
b= a + 22a= html.find('color: #555555;" src=',b)returnimgurls'''for each in imgurls:
print(each)'''
defsave_imgs(folder,imgs):for ea inimgs:
filename= ea.split('/')[-1]
with open(filename,'wb') as f:
img=url_open(ea)
f.write(img)def download_mm(folder='H:\\xxoo2',page_count = 100,count = 100):
main_url= 'http://www.zhangzishi.cc/category/welfare'main_urls=[]for i inrange(count):
main_urls.append(main_url+'/page/'+str(i+1))
os.mkdir(folder)
os.chdir(folder)for url inmain_urls:
htmls=page_htmls(url,page_count)for page inhtmls:
imgurls=imgurl_get(page)
save_imgs(folder,imgurls)def download__img(folder='H:\\xxoo',page_count=100):
main_url= 'http://www.zhangzishi.cc/category/welfare'os.mkdir(folder)
os.chdir(folder)
htmls=page_htmls(main_url,page_count)for page inhtmls:
imgs_url=find_imgs(page)
save_imgs(folder,imgs_url)if __name__ == '__main__':
download_mm()#download__img()