准备工作:利用selenium自动刷新网页
python代码:
'''
Author: lu
Date: 2022-09-19 14:27:31
LastEditors: lu
LastEditTime: 2022-09-21 17:13:16
FilePath: \study\savePic.py
Description: 爬图片
'''
#python savePic.py
#pip install os
import os
import requests
import bs4
from bs4 import BeautifulSoup #pip install BeautifulSoup4
from selenium.webdriver import Chrome
from selenium.webdriver.support.select import Select
from selenium.webdriver.chrome.options import Options
webdrive = Chrome(executable_path=r"C:\Users\18851\AppData\Local\Programs\Python\Python310\Scripts\chromedriver.exe") #chromedriver路径
url = "https://www.csdn.net/" #要爬的网站
webdrive.get(url) #发请求
text = webdrive.page_source #得到页面element的html代码
tree = BeautifulSoup(text,"html.parser")
imgs = tree.find_all('img')
path=r'./phhz'#判断同级有没有文件夹
print(os.path.exists(path))
if(not os.path.exists(path)):#如果同级没有文件夹
os.mkdir(path)
url = ''
for i in range(1,len(imgs)):
print('imgs[i] ', imgs[i] ) #标签
print('imgs[i] ',type(imgs[i] ) ) #是BeautifulSoup标签
if(imgs[i].has_key('data-src')): #判断BeautifulSoup获取的标签有没有属性
url = imgs[i].attrs['data-src']
else:
url = imgs[i].attrs['src']
img = requests.get(url)
filename = 'phhz'+str(i)+'.jpg'
with open('./phhz/'+filename,'wb') as f: f.write(img.content)
webdrive.quit()
结果