图片爬取爬虫

沁一致

已于 2022-03-20 15:33:58 修改

阅读量831

点赞数

文章标签：开发语言

于 2022-03-20 15:32:45 首次发布

本文链接：https://blog.csdn.net/m0_59205095/article/details/123614189

版权

import requests
import os
import urllib.request as ur
from bs4 import BeautifulSoup
import re
hd=ur.urlopen("https://baidu.com/")
cat=hd.read().decode("utf-8")
soup=BeautifulSoup(cat,"html.parser")
soup1=soup.find_all(re.compile("img"))
j=0
for i in soup1:
    j=j+1
    str1=i.get("src")
    print(j)
    root = "D://tppq//"
    url = str1
    path = root + url.split('/')[-1]
    kv={"user-agent":"Mozilla/5.0"}
    cookie={}
    try:
        if not os.path.exists(root):
            os.mkdir(root)
        if not os.path.exists(path):
            r = requests.get(url,headers=kv,cookies=cookie)
            with open(path, 'wb') as f:
                f.write(r.content)
                f.close()
                print("文件保存成功")
        else:
            print("文件已存在")
    except:
        print("爬取失败")

懒得写注释