import requests
from bs4 import BeautifulSoup
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36'}
url="htxxxxxxxxxxxxxxxxxxxxx/page/1"
res=requests.get(url,headers=headers)
text=res.text
soup=BeautifulSoup(text,'html.parser')
page=soup.find_all('li',class_="number") #找出源码中所有包含class_='page-numbers'的li标签,会以一个列表的形式保存
max_page = page[-1].text #取出总页数
same_url = "htxxxxxxxxxxxxxxx/page/"
for i in range(1,int(max_page)+1):
page_url = same_url + str(i) # 构造每页的url
get_page_url = requests.get(page_url,headers=headers) #请求每页的url
page_soup = BeautifulSoup(get_page_url.text,'html.parser') #加载每页源码内容
get_all_a = page_soup.find_all('div', class_='el-card item m-t is-hover-shadow') #要找最小父级标签
for aa in get_all_a:
name=aa.find('h2',class_='m-b-sm')
name_1=name.text #名称
url=aa.find('img')
url_1=url['src'] #地址
urlss=requests.get(url_1) #请求每张图片的下载
with open(r'E:\adcd\ '+name_1+'.jpg','wb') as f:
f.write(urlss.content)
BeautifulSoup-爬取整个多页图片
于 2023-03-28 00:06:32 首次发布