使用requests+BeautifulSoup将彼岸图网图片保存到本地（二）

最新推荐文章于 2024-06-14 02:02:23 发布

測試界的彭于晏

最新推荐文章于 2024-06-14 02:02:23 发布

阅读量261

点赞数 7

文章标签： beautifulsoup python 爬虫

本文链接：https://blog.csdn.net/Control__CV/article/details/139240663

版权

这次将url图片详情页的地址解析出来然后再保存到本地，以下是改进后的代码：

import requests
from bs4 import BeautifulSoup

# 请求网页
url = 'https://pic.netbian.com/'

# 请求头
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 '
                  'Safari/537.36'
}

# 文件保存路径
my_path = r'D:\wallpapers'

# 图片序号
i = 1

for index in range(1, 3):
    # 第一页和其他页面的地址不同 除了第一页都需要拼接index.html
    if index == 1:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        result_set = soup.select('ul.clearfix a')

        for p1 in result_set:
            res = requests.get(url + p1['href'], headers=headers)
            soup = BeautifulSoup(res.text, 'html.parser')
            res_set = soup.select('a#img img')

            for p2 in res_set:
                path = my_path + '/{}.jpg'.format('wallpaper' + str(i))
                # wb 二进制读写方式
                with open(path, 'wb') as img:
                    ress = requests.get(url + p2['src'])
                    # 将图片请求的结果内容写到文件中
                    img.write(ress.content)
                    img.close()
                    i += 1
                print(path + '下载成功！')
    elif index > 1:
        response = requests.get(url + 'index_' + str(index) + '.html', headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        result_set = soup.select('ul.clearfix a')

        for p1 in result_set:
            res = requests.get(url + p1['href'], headers=headers)
            soup = BeautifulSoup(res.text, 'html.parser')
            res_set = soup.select('a#img img')

            for p2 in res_set:
                path = my_path + '/{}.jpg'.format('wallpaper' + str(i))
                # wb 二进制读写方式
                with open(path, 'wb') as img:
                    ress = requests.get(url + p2['src'])
                    # 将图片请求的结果内容写到文件中
                    img.write(ress.content)
                    img.close()
                    i += 1
                print(path + '下载成功！')