Python爬取必应搜索首页图片

最新推荐文章于 2023-12-05 20:27:31 发布

觉皇嵌入式

最新推荐文章于 2023-12-05 20:27:31 发布

阅读量1.3k

点赞数 3

文章标签：爬虫 Python 必应

本文链接：https://blog.csdn.net/qq153471503/article/details/89476026

版权

爬取必应搜索首页图片

安装库：

pip install beautifulsoup4
pip install lxml
pip install requests

"""
爬取每日必应首页图片：

必应首页图片一共七张
"""

import requests
from bs4 import BeautifulSoup
from requests.exceptions import RequestException
import re
import os


def get_page(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
    }

    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.text
        return None
    except RequestException:
        return None


def download(url, path, fname):
    response = requests.get(url)
    if response:
        with open(os.path.join(path, fname), 'wb') as f:
            f.write(response.content)
            print('download: {} successful.'.format(fname))
    else:
        print('faild: {}.'.format(fname))


for i in range(8):
    url = 'https://cn.bing.com/HPImageArchive.aspx?idx={}&n=1'.format(i)
    html = get_page(url)
    soup = BeautifulSoup(html, 'lxml')
    text = soup.find(name='url').string
    img_url = 'https://cn.bing.com' + text
    img_name = re.match('^/th\?id=(.*?)&', text).group(1)
    download(img_url, 'E:/wallpaper/bing', img_name)