爬取必应搜索首页图片
安装库:
pip install beautifulsoup4
pip install lxml
pip install requests
"""
爬取每日必应首页图片:
必应首页图片一共七张
"""
import requests
from bs4 import BeautifulSoup
from requests.exceptions import RequestException
import re
import os
def get_page(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
}
try:
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.text
return None
except RequestException:
return None
def download(url, path, fname):
response = requests.get(url)
if response:
with open(os.path.join(path, fname), 'wb') as f:
f.write(response.content)
print('download: {} successful.'.format(fname))
else:
print('faild: {}.'.format(fname))
for i in range(8):
url = 'https://cn.bing.com/HPImageArchive.aspx?idx={}&n=1'.format(i)
html = get_page(url)
soup = BeautifulSoup(html, 'lxml')
text = soup.find(name='url').string
img_url = 'https://cn.bing.com' + text
img_name = re.match('^/th\?id=(.*?)&', text).group(1)
download(img_url, 'E:/wallpaper/bing', img_name)
效果
ends…