爬虫python 图片_Python图片爬虫

importurllib.requestimportosdefurl_open(url):

req=urllib.request.Request(url)

req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36')

response=urllib.request.urlopen(req)

html=response.read()print('url_open')returnhtmldefpage_htmls(url,count):

html= url_open(url).decode('utf-8')

pages=[]

a= html.find('a target="_blank" href=')

i=0while a != -1:

i+= 1b= html.find('.html',a,a+200)if b != -1:

pages.append(html[a+24:b+5])else:

b= a + 24a= html.find('a target="_blank" href=',b)if i ==count:break

for each inpages:print(each)returnpages'''

'''

deffind_imgs(url):

html= url_open(url).decode('utf-8')

imgs=[]

a= html.find('img src=')while a != -1:

b= html.find('.jpg',a,a+100)if b != -1:if html[a+9:b+4].find('http') == -1:

imgs.append('http:'+html[a+9:b+4])else:

imgs.append(html[a+9:b+4])else:

b= a + 9a= html.find('img src=',b)'''for each in imgs:

print(each)'''

returnimgsdefimgurl_get(url):

html= url_open(url).decode('utf-8')

imgurls=[]

a= html.find('color: #555555;" src=')while a != -1:

b= html.find('.jpg',a,a+100)if b != -1:

imgurls.append('http:'+html[a+22:b+4])else:

b= a + 22a= html.find('color: #555555;" src=',b)returnimgurls'''for each in imgurls:

print(each)'''

defsave_imgs(folder,imgs):for ea inimgs:

filename= ea.split('/')[-1]

with open(filename,'wb') as f:

img=url_open(ea)

f.write(img)def download_mm(folder='H:\\xxoo2',page_count = 100,count = 100):

main_url= 'http://www.zhangzishi.cc/category/welfare'main_urls=[]for i inrange(count):

main_urls.append(main_url+'/page/'+str(i+1))

os.mkdir(folder)

os.chdir(folder)for url inmain_urls:

htmls=page_htmls(url,page_count)for page inhtmls:

imgurls=imgurl_get(page)

save_imgs(folder,imgurls)def download__img(folder='H:\\xxoo',page_count=100):

main_url= 'http://www.zhangzishi.cc/category/welfare'os.mkdir(folder)

os.chdir(folder)

htmls=page_htmls(main_url,page_count)for page inhtmls:

imgs_url=find_imgs(page)

save_imgs(folder,imgs_url)if __name__ == '__main__':

download_mm()#download__img()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值