这次将url图片详情页的地址解析出来然后再保存到本地,以下是改进后的代码:
import requests
from bs4 import BeautifulSoup
# 请求网页
url = 'https://pic.netbian.com/'
# 请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 '
'Safari/537.36'
}
# 文件保存路径
my_path = r'D:\wallpapers'
# 图片序号
i = 1
for index in range(1, 3):
# 第一页和其他页面的地址不同 除了第一页都需要拼接index.html
if index == 1:
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
result_set = soup.select('ul.clearfix a')
for p1 in result_set:
res = requests.get(url + p1['href'], headers=headers)
soup = BeautifulSoup(res.text, 'html.parser')
res_set = soup.select('a#img img')
for p2 in res_set:
path = my_path + '/{}.jpg'.format('wallpaper' + str(i))
# wb 二进制读写方式
with open(path, 'wb') as img:
ress = requests.get(url + p2['src'])
# 将图片请求的结果内容写到文件中
img.write(ress.content)
img.close()
i += 1
print(path + '下载成功!')
elif index > 1:
response = requests.get(url + 'index_' + str(index) + '.html', headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
result_set = soup.select('ul.clearfix a')
for p1 in result_set:
res = requests.get(url + p1['href'], headers=headers)
soup = BeautifulSoup(res.text, 'html.parser')
res_set = soup.select('a#img img')
for p2 in res_set:
path = my_path + '/{}.jpg'.format('wallpaper' + str(i))
# wb 二进制读写方式
with open(path, 'wb') as img:
ress = requests.get(url + p2['src'])
# 将图片请求的结果内容写到文件中
img.write(ress.content)
img.close()
i += 1
print(path + '下载成功!')