一、思路说明
1.1 通过源码查看,获取图片链接
二、代码
import requests
from bs4 import BeautifulSoup
import os
import time
import re
# 定义存放图片目录
img_dir = r'C:\Users\JH\Desktop\爬虫\img'
User_Agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
# 构建headers
headers = {
'User-Agent': User_Agent
}
url = "https://umei.cc/bizhitupian/weimeibizhi/"
# 获取html页面
rsp = requests.get(url, headers=headers)
rsp.encoding = 'utf-8'
# 定义soup对象
soup = BeautifulSoup(rsp.text, 'html.parser')
#获取图片url
alist = soup.find_all(name='img', class_="lazy", attrs={'data-original': re.compile('^http.*jpg')})
# 获取图片URL列表
url_lists = []
for img_info in alist:
#列表,数据格式为: [[url1],[url2],[url3]]
img_url = re.findall(r'http.*jpg', str(img_info))
url_lists.append(img_url)
# 创建图片存放目录
if not os.path.exists(img_dir):
os.mkdir(img_dir)
else:
for url_list in url_lists:
print(url_list[0])
filename = url_list[0].split('/')[-1]
file_name_path = img_dir + '\\' + filename
print(filename)
with open(file_name_path, 'wb') as fs:
fs.write(requests.get(url_list[0], headers=headers, stream=True).content)
print("{}图片下载完成".format(filename))