import requests
import re
def huaban(url):
r=requests.get(url).content.decode('utf-8')
pages=re.compile(r'app\.page\["pins"\].*').findall(r)
if pages == []:
null = None
result = eval(pages[0][19:-1])
images = []
for i in result:
info = {}
info['id']=str(i['pin_id'])
info['url'] = "http://img.hb.aicdn.com/" + i["file"]["key"] + "_fw658"
if 'image' == i["file"]["type"][:5]:
info['type'] = i["file"]["type"][6:]
else:
info['type'] = 'NoName'
images.append(info)
for image in images:
req = requests.get(image["url"])
imageName = image["id"] + "." + image["type"]
with open(imageName, 'wb') as fp:
fp.write(req.content)
new_url="http://huaban.com/favorite/beauty/?i5p998kw&max=" + images[-1]['id'] + "&limit=20&wfl=1"
huaban(new_url)
huaban('http://huaban.com/favorite/beauty/')
更多爬虫实例请见 https://blog.csdn.net/weixin_39777626/article/details/81564819