import requests
import re
url = f'http://www.netbian.com/index_2.htm'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
}
response = requests.get(url=url, headers=headers)
response.encoding = response.apparent_encoding
# print(response.text)
img_info = re.findall(
'<a href="(.*?)" title=".*?" target="_blank"><img src=".*?" alt="(.*?)" />.*?</a>', response.text)
for link, title in img_info:
link_url = 'http://www.netbian.com'+link
html_data = requests.get(url=link_url, headers=headers)
html_data.encoding = html_data.apparent_encoding
print(html_data.text)
img_url = re.findall(
'target="_blank"><img src="(.*?)" alt=".*?" title=".*?">', html_data.text)[0]
img_content = requests.get(url=img_url, headers=headers).content
with open('img//'+title+'.jpg', mode='wb') as f:
f.write(img_content)
Python实操网站图片爬虫自动下载
最新推荐文章于 2024-09-21 17:56:48 发布
本文介绍了如何使用Python的requests库和正则表达式从指定网页抓取图片链接,然后下载图片保存到本地。
摘要由CSDN通过智能技术生成