import requests from bs4 import BeautifulSoup from PIL import Image import os from io import BytesIO import time url = "http://www.huajiao.com/category/1000" #写入你需要爬取的网页 headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"} r = requests.get(url, headers=headers) soup = BeautifulSoup(r.content, 'html.parser') items = soup.find_all('img', class_='img') #image的类 name = list(soup.find_all('p', class_='name fl')) #名字的类 a=[] for names in name: names=names.text a.append(names) folder_path = './photo' if os.path.exists(folder_path) == False: os.makedirs(folder_path) for index, item in enumerate(items): if item: html = requests.get(item.get('src')) img_name =str( a[ (index + 1)]).strip() + '.png' image = Image.open(BytesIO(html.content)) image.save('E:\Python\photo/' + img_name) print('第%d张图片下载完成' % (index + 1)) time.sleep(1) # 自定义延时 print('抓取完成')
运行示意图