爬取网站图片(1.正则表达式)
如果文件夹不存在,就创建文件夹。
path = "E:\\images\\"
if os.path.exists(path):
pass
else:
os.mkdir(path)
import re
import requests
import os
import urllib.request
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
} #加入请求头
url = 'https://www.vmgirls.com/13344.html'
res = requests.get(url, headers=headers) #请求网页
images = re.findall('<img alt=".*?" src=".*?" data-src="(.*?)" .*?>',res.content.decode('utf-8')) #获取图片链接
count = 0
os.makedirs('E:\\images\\',exist_ok=True) #文件夹
for image in images:
count = count + 1
fobj = open("E:\\images\\" + str(count) + image[-5:],"wb") #路径
data = requests.get(image,headers=headers) #再次请求图片链接
fobj.write(data.content) #写入
fobj.close()
print("dowmloaded " + str(count) + image[-5:])