Python抓取网页图片

最新推荐文章于 2023-07-20 15:20:03 发布

shaobohong

最新推荐文章于 2023-07-20 15:20:03 发布

阅读量299

点赞数

分类专栏： Python

本文链接：https://blog.csdn.net/qq_17613195/article/details/104342310

版权

Python 专栏收录该内容

2 篇文章 1 订阅

订阅专栏

import urllib.request
import os
import urllib
import bs4
import time


def get_html(url):
    content = urllib.request.urlopen(url).read()
    soup = bs4.BeautifulSoup(content, "html.parser")
    return soup


def get_img(soup, url):
    local_dir = 'E:\\pythonDownload\\image'  # 设置图片的保存地址
    if not os.path.isdir(local_dir):
        os.makedirs(local_dir)  # 判断没有此路径则创建

    img_tag = 'a'  # 网页中包含图片信息的标签
    attr_src = 'data-original'  # 图片路径所在的attr
    attr_name = 'title'  # 图片名称所在的attr
    attr_href = 'href'  # 图片href所在的attr
    doms = soup.findAll(img_tag)

    list_src = []
    list_name = []
    list_href = []
    for dom in doms:
        if dom.has_attr(attr_src):
            src = urllib.parse.urljoin(url, dom[attr_src])  # 相对路径转换为绝对路径
            list_src.append(src)

            if dom.has_attr(attr_name):
                list_name.append(dom[attr_name])
            else:
                list_name.append(time.strftime("%Y%m%d%H%M%S", time.localtime()))

            if dom.has_attr(attr_href):
                list_href.append(dom[attr_href])

    num = 0
    for i in range(list_src.__len__()):
        imgurl = list_src[i]
        filesuffix = imgurl.split(".")[-1]  # 获取文件后缀名

        filename = local_dir + os.sep + list_name[i] + '_' + str(num) + '.' + filesuffix
        print(filename + "   " + imgurl)  # 打印下载信息
        urllib.request.urlretrieve(imgurl, filename)  # 下载图片
        num += 1

    print('下载图片：' + str(num) + '张')


path = "www.baidu.com"  # 要下载图片的网页
html = get_html(path)  # 获取该网页的详细信息
get_img(html, path)  # 从网页源代码中分析下载保存图片
print('done')

shaobohong

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Python抓取网页图片

import urllib.requestimport osimport urllibimport bs4import timedef get_html(url): content = urllib.request.urlopen(url).read() soup = bs4.BeautifulSoup(content, "html.parser") ret...
复制链接

扫一扫