python爬虫爬取高清图片——爬虫入门

最新推荐文章于 2024-07-30 17:23:09 发布

摸摸头发在不在

最新推荐文章于 2024-07-30 17:23:09 发布

阅读量1k

点赞数 2

文章标签： python 爬虫开发语言 pycharm

本文链接：https://blog.csdn.net/weixin_40027143/article/details/127740628

版权

自动抓取某图片网站高清壁纸并下载保存

使用requests请求网页，bs4解析数据

话不多说直接看代码，刚学不久欢迎指点

#-*- codeing = utf-8 -*-
#@Time : 2022/11/7 15:22
#@Author : 摸摸头发在不在
#@File : getimg.py
#@Software: PyCharm

'''    思路
1.拿到主页面的源代码
2.提取子页面的链接地址 href
3.通过href拿到子页面的url，前往子页面中找到图片下载地址
4.下载图片
'''

import requests
from bs4 import BeautifulSoup
import time

def main():
    gaveurl = "https://www.umei.cc/bizhitupian/fengjingbizhi/"
    html = get_html(gaveurl) #得到主页面源码
    page = BeautifulSoup(html,"html.parser")
    href = page.find("div",id = "infinite_scroll").find_all('a',class_='img_album_btn')#生成列表所有符合要求的数据为元素
    for a in href:  #循环遍历每一个元素（子网页部分网址）
        hf = a.get("href")
        hf_html_child = hf.split("/")[-1]
        hf_html = gaveurl + hf_html_child  #拼接得到子页面完整网址
        chile_html = get_html(hf_html)    #调用解析函数得到子页面源码.text格式
        chile_page = BeautifulSoup(chile_html,"html.parser") #对源码进行处理，生成bs4对象
        resp = chile_page.find("div",class_="big-pic").find("img") #在对象中根据标签查找
        src = resp.get("src")
        img = requests.get(src).content  #拿到图片字节
        with open("img/"+ hf_html_child+".jpg",mode="wb") as f:
            f.write(img) #写入文件
        print("over!!!",hf_html_child)
        time.sleep(1)


def get_html(url):
    '''
     拿到页面源码
    :param url: 目标网址
    :return: 页面源码
    '''
    header = {
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36"
    }
    resp = requests.get(url,headers = header)
    resp.encoding = 'utf-8'
    html = resp.text
    return html


if __name__ == "__main__":
    main()