import requests import os from lxml import etree # 创建保存图片的文件夹 if not os.path.exists('images'): os.mkdir('images') count = 0 # 使用任意爬虫技术爬取:https://www.4kbizhi.com/mobile/中的前10页图片,并且打印最终个数,保存到images文件夹 for i in range(1, 11): if i == 1: url = 'https://www.4kbizhi.com/mobile/' else: url = 'https://www.4kbizhi.com/mobile/index_{}.html'.format(i) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0" } response = requests.get(url=url, headers=headers) response.encoding = "gbk" text = response.text etro = etree.HTML(text) # 获取图片列表数 image_elements = etro.xpath('/html/body/div/div/ul/li') print('第{}页有{}张图片'.format(i, len(image_elements))) # 遍历获取图片 for image_element in image_elements: # 图片详情地址 url_01 = 'https://www.4kbizhi.com/' + image_element.xpath('/html/body/div[3]/div[1]/ul/li/a/img/@src')[0] title = image_element.xpath('.//p/text()')[0] image_urls = requests.get(url=url_01, headers=headers).content print(url_01) count += 1 # 下载图片并保存到本地文件夹images中 with open('images/{}.jpg'.format(title), 'wb') as f: f.write(image_urls) print(title, "下载成功") print('第{}页爬取完成'.format(i)) print('总共爬取了{}张图片'.format(count))
爬取网络壁纸
于 2024-01-02 14:38:49 首次发布