本文总结了在堆糖网下载图片的方法。
代码如下:
#-*- coding: utf-8 -*-
import requests
import re
from bs4 import BeautifulSoup
import time
import os.path
import sys
import urllib
if __name__ == "__main__":
SearchContent_input=raw_input('请输入需要搜索的内容:\n')
#开始计时
Begintime = time.time()
#对搜索内容进行url编码
SearchContent=urllib.quote(SearchContent_input.decode(sys.stdin.encoding).encode('utf8'))
url='https://www.duitang.com/search/?kw='+SearchContent+'&type=feed'
cont=requests.get(url).content
soup = BeautifulSoup(cont,'html.parser')
#Total Page Num
Max_Page=soup.find('span',attrs={'id': 'G-totalpagenum'}).get_text()
sum_pic=0
for i in range(1,int(Max_Page)+1):
print '正在下载第'+str(i)+'页,共'+str(Max_Page)+'页...'
cont=requests.get(url).content
soup = BeautifulSoup(cont,'html.parser')
Content_list=soup.find_all('div',attrs={'class': 'woo'})
num=1
for content in Content_list:
try:
pic_id=img_src=content.find('img')['data-rootid']
pic_url='https://www.duitang.com/blog/?id='+str(pic_id)
cont=requests.get(pic_url).content
soup_pic = BeautifulSoup(cont,'html.parser')
img_src=soup_pic.find('a',attrs={'class': 'img-out'})['href']
img_path=sys.path[0]+'/Download/'+SearchContent_input+'NO '+str(num+24*(i-1))+'.jpg'
#下载大图
urllib.urlretrieve(img_src,img_path)
num=num+1
sum_pic=sum_pic+1
except:
#下载一般图片
img_src=content.find('img')['src']
img_path=sys.path[0]+'/Download/'+SearchContent_input+'NO '+str(num+24*(i-1))+'.jpg'
urllib.urlretrieve(img_src,img_path)
num=num+1
sum_pic=sum_pic+1
url='https://www.duitang.com/search/?kw='+SearchContent+'&type=feed&start='+str(24*i)+'&limit=24'
print '共计下载'+str(sum_pic)+'张图片'
timeslot=time.time()-Begintime
print '共用时'+str(int(timeslot)/60)+'min'+str(int(timeslot)%60)+'s'
代码原创,禁止随意转载,仅供学习交流使用