···先是一如既往的导包 import urllib.request import os import re from lxml import etree ···建立一个文件来保存图片 if not os.path.exists('./小姐姐/'): os.mkdir('./小姐姐/') headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36', } ···因为要爬取多页图片,所以对url进行分析 for page in range(1, 2): ···当第一页时 if page == 1: url = 'https://www.kanxiaojiejie.tk/' ···当多页时 else: url = f'https://www.kanxiaojiejie.tk/page/{page}' ···发送请求 request = urllib.request.Request(url=url, headers=headers) ···获取响应对象 response = urllib.request.urlopen(request) html = response.read().decode('utf-8') ···通过xpath对爬取内容进行定位 tree = etree.HTML(html) img_list = tree.xpath('//div[@class="gridsoul-posts gridsoul-posts-grid"]/div//img/@src') name_list = tree.xpath('//div[@class="gridsoul-posts gridsoul-posts-grid"]/div//img/@alt') ···保存 for i in range(len(name_list)): src = img_list[i] name = name_list[i] suffix = src.split('.')[-1] urllib.request.urlretrieve(url=src, filename='./小姐姐/' + name + '.' + suffix) print(f'{name}---下载完成') print(f'*************当前在{page}页*************')
爬取点网站图片
于 2023-05-16 20:27:37 首次发布