python实战：爬取优美图库,将图片格式的本地存储

马虎的程序猿

已于 2022-02-13 17:27:27 修改

阅读量2.9k

点赞数 1

文章标签： python 开发语言后端

于 2021-11-08 22:55:58 首次发布

本文链接：https://blog.csdn.net/weixin_45195493/article/details/121218285

版权

import requests
from bs4 import BeautifulSoup
import time

解析主页面原代码,提取子页面url
通过子页面拿取内容,找到图片的下载地址
下载图片

url = 'https://www.umei.cc/bizhitupian/weimeibizhi/'

resp = requests.get(url)

resp.encoding='utf-8'#处理乱码

#print(resp.text)

main_page = BeautifulSoup(resp.text, 'html.parser')#源代码交给bs4

alist = main_page.find('div',class_ = 'TypeList').find_all('a')#找到子页面url

for a in alist:
    a = a.get('href').split('/')[3]#切割url 并且拼接url
    url_l = url+a

    chil_page_resp = requests.get(url_l)#访问子页面
    chil_page_resp.encoding = 'utf-8'
    
    chil_page_text = chil_page_resp.text

    chil_page = BeautifulSoup(chil_page_text,'html.parser')#将源代码交给bs4,找到下载路径

    abs = chil_page.find('p',align = 'center').find('img')
    #find('标签',属性 = '属性值') 
    #find =('p',calss_='***')或find =('p',apprs{class:属性值})
    
    src = abs.get('src')#通过get请求拿到属性值

    img_resp = requests.get(src)#访问下载路径

    #img_resp.content #拿到字节
    img_name = src.split('/')[-1]#切割最后一个内容
    with open('img/'+img_name,mode='wb') as f:#img/是本地文件
        f.write(img_resp.content)#写入文件

        time.sleep(1)

    print('over!!',img_name)

print('结束')