# -*- coding: utf-8 -*-
import os
import requests
from bs4 import BeautifulSoup
index_url = 'https://1111.com/view/202302/65110.html' # 首页url
header = { # 伪装浏览器头
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
}
def download_img(img_url):
# 获取图片保存文件名
name = img_url.split('/')[-1]
# 创建保存文件
file_dir_name = '/tmp/88/'
if not os.path.exists(file_dir_name):
os.mkdir(file_dir_name)
file_name = file_dir_name + name
if not os.path.exists(file_name):
# 下载图片
r = requests.get(img_url)
with open(file_name, 'wb') as f:
f.write(r.content)
f.close()
print('文件保存成功')
else:
print('文件已存在')
if __name__ == '__main__':
# 爬取文章页面
response = requests.get(index_url, headers=header)
if response.status_code == 200:
html = response.text
bs = BeautifulSoup(html, 'html.parser')
# 查找图片链接
img_links = bs.find_all('img')
img_list = set()
for link in img_links:
link = link.get('src')
img_list.add(link)
for link in img_list: # 下载图片
download_img(link)
else:
print('请求失败')
python3爬取图片
于 2023-02-27 22:10:59 首次发布