from selenium import webdriver
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
import time
import os
import requests
urlList=[]
for url in urlList:
# 创建Chrome浏览器实例
driver = webdriver.Chrome()
uri=url.get('http')
# 访问网页
driver.get(uri)
# 等待页面加载完成
time.sleep(5) # 根据需要调整等待时间
# 获取页面源代码
html_source = driver.page_source
# 使用BeautifulSoup解析
soup = BeautifulSoup(html_source, 'html.parser')
tee_image_elements = soup.find_all(lambda tag: tag.name == 'img' and tag.get('class') == ['tee-image'])
img_url1 = tee_image_elements[0].get('src')
if img_url1:
if img_url1.endswith("!large.webp"):
img_url = img_url1[:-11]
# 下载图片
response = requests.get(img_url, stream=True)
if response.status_code == 200:
#更改名称
parsed_url = urlparse(uri)
img_folder = os.path.dirname(parsed_url.path)
img_file_name = os.path.basename(parsed_url.path)
img_file = img_file_name + ".jpg"
# 图片保存路径,例如保存在当前目录下的`images`文件夹
image_path = os.path.join("test_img", os.path.basename(img_file))
with open(image_path, 'wb') as f:
for chunk in response.iter_content(1024):
f.write(chunk)
# 关闭浏览器实例
driver.quit()
python 爬网页动态元素里的图片
于 2024-05-30 15:46:38 首次发布