Python3 抓取网页中的图片

最新推荐文章于 2021-02-03 06:56:38 发布

damotiansheng

最新推荐文章于 2021-02-03 06:56:38 发布

阅读量5k

点赞数

分类专栏： Python

Python 专栏收录该内容

27 篇文章 1 订阅

订阅专栏

import urllib.request
import socket
import re
import sys
import os
targetDir = r"C:\Users\elqstux\Desktop\pic"
def destFile(path):
    if not os.path.isdir(targetDir):
        os.mkdir(targetDir)
    pos = path.rindex('/')
    t = os.path.join(targetDir, path[pos+1:])
    return t

if __name__ == "__main__":
    hostname = "http://www.douban.com"
    req = urllib.request.Request(hostname)
    webpage = urllib.request.urlopen(req)
    contentBytes = webpage.read()
    for link, t in set(re.findall(r'(http:[^\s]*?(jpg|png|gif))', str(contentBytes))):
        print(link)
        urllib.request.urlretrieve(link, destFile(link))

import urllib.request
import socket
import re
import sys
import os
targetDir = r"H:\pic"
def destFile(path):
    if not os.path.isdir(targetDir):
        os.mkdir(targetDir)
    pos = path.rindex('/')
    t = os.path.join(targetDir, path[pos+1:])  #会以/作为分隔
    return t

if __name__ == "__main__":
    hostname = "http://www.douban.com/"
    req = urllib.request.Request(hostname)
    webpage = urllib.request.urlopen(req)
    contentBytes = webpage.read()
    match = re.findall(r'(http:[^\s]*?(jpg|png|gif))', str(contentBytes) )#r'(http:[^\s]*?(jpg|png|gif))'中包含两层圆括号，故有两个分组，
                                                          #上面会返回列表，括号中匹配的内容才会出现在列表中
    for picname, picType in match:
        print(picname)
        print(picType)
      

'''
输出：
http://img3.douban.com/pics/blank.gif
gif
http://img3.douban.com/icon/g111328-1.jpg
jpg
http://img3.douban.com/pics/blank.gif
gif
http://img3.douban.com/icon/g197523-19.jpg
jpg
http://img3.douban.com/pics/blank.gif
gif
...
'''

转载来源： http://blog.csdn.net/wangyangkobe/article/details/8712121