import requests import os from bs4 import BeautifulSoup # 使用任意爬虫技术爬取:https://www.4kbizhi.com/mobile/中的前10页图片,并且打印最终个数,保存到images文件夹 # 创建保存图片的文件夹 if not os.path.exists('images'): os.mkdir('images') count = 0 for i in range(1, 11): if i == 1: url = 'https://www.4kbizhi.com/mobile/' else: url = 'https://www.4kbizhi.com/mobile/index_{}.html'.format(i) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0" } response = requests.get(url=url, headers=headers) response.encoding = "gbk" text = response.text soup = BeautifulSoup(text, 'html.parser') # 获取图片列表数 image_elements = soup.select('div.piclist.clearfix ul.item li') print('第{}页有{}张图片'.format(i, len(image_elements))) # 遍历获取图片 for image_element in image_elements: # 图片详情地址 url_01 = 'https://www.4kbizhi.com/' + image_element.select('div:nth-of-type(3) div:nth-of-type(1) ul li a img')[0]['src'] title = image_element.select('p')[0].get_text() image_urls = requests.get(url=url_01, headers=headers).content print(url_01) count += 1 # 下载图片并保存到本地文件夹images中 with open('images/{}.jpg'.format(title), 'wb') as f: f.write(image_urls) print(title, "下载成功") print('第{}页爬取完成'.format(i)) print('总共爬取了{}张图片'.format(count))
bs4爬取网络壁纸
于 2024-01-02 14:43:45 首次发布