import requests
from bs4 import BeautifulSoup
import urllib
import os
address = str(1274477)
def getHtml(index,number):
url = "https://movie.douban.com/celebrity/"+number+"/photos/?type=C&start=" + str(index)
r = requests.get(url,{"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11"})
soup = BeautifulSoup(r.text,"html.parser")
return soup
images = []
imgLen = len(getHtml(0,address).find('ul', attrs={'class': "poster-col3 clearfix"}).find_all('img'))
def getImages(pageNum,name,number):
if os.path.exists(name):
os.rmdir("photos")
else:
os.mkdir(name)
os.chdir(name)
global address,images,imgLen
for k in range(pageNum):
eachsoup = getHtml(k*imgLen,number)
imageList = eachsoup.find('ul', attrs={'class': "poster-col3 clearfix"})
images.extend(imageList.find_all('img'))
for i in range(len(images)):
try:
suffix = images[i]['src'][-3:]
image_name = str(i+1)+'.'+suffix
urllib.request.urlretrieve(images[i]['src'],image_name)
except Exception:
print('存储有异常')
return
getImages(1,'huge',address)