过程:
1.首先获取需要下载图片的src属性
利用正则先获取img标签,利用标签的attrs属性获取到src属性地址,注意有些地址不全,在后面使用的时候要在前面加上网址
import requests,re,os
from bs4 import BeautifulSoup
def getPicUrls(url):
try:
r =requests.get(url)
r.raise_for_status()
soup = BeautifulSoup(r.text,'html.parser')
html=soup.find('div',{'class':'wenzhangcontent'}).findAll('img')
return html
except Exception as e:
print(e)
2.open write来下载图片
localPath = 'd:/py_pics/'
if not os.path.exists(localPath):
os.mkdir(localPath)
domain ='http://www.lyjyfw.net/'
picUrls=getPicUrls('http://www.lyjyfw.net/Html/News/201844/tR0454108.html')
for i,item in enumerate(picUrls):
try:
pic = requests.get(domain+item.attrs['src'],timeout=15)
with open(localPath+'{}.jpg'.format(i),'wb') as f:
f.write(pic.content) #content写入的是二进制数
print('成功下载第{:d}张图片:{:s}'.format((i+1),domain+item.attrs['src']))
except Exception as e:
print('下载第{:d}张图片失败:{:s}'.format((i+1),domain+item.attrs['src']))
print(e)
continue