python用xpath爬取10页网站图片

最新推荐文章于 2024-07-31 00:53:52 发布

赤坂·龙之介

最新推荐文章于 2024-07-31 00:53:52 发布

阅读量888

点赞数 1

分类专栏： python爬虫

本文链接：https://blog.csdn.net/Nagato_Yuki_SOS/article/details/107549229

版权

python爬虫专栏收录该内容

9 篇文章 1 订阅

订阅专栏

#爬取网站图片
import requests
from lxml import etree  
import os

i=0  #计数
#请求头
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36"}
for x in range(1,11):
	html="https://www.zerochan.net/Yukinoshita+Yukino?p="+str(x)   #一共10页图片
	r=requests.get(html,headers=headers)  #请求响应
	
	#解析网页内容
	html=etree.HTML(r.text)

	root="F://z yukinoshita/"  #需要存储的根目录
	if not os.path.exists(root):    #判断根目录是否存在
		os.mkdir(root)  	 #不存在则创建文件夹

	url=html.xpath('//*[@id="thumbs2"]/li/a/img/@src')  #从网页信息中获取图片链接
	z=len(url)
	for y in range(0,z):
		html_1="https://static.zerochan.net/Yukinoshita.Yukino.full."+url[y].split('/')[-1]
		image_name=html_1.split('/')[-1]    #图片的名字
	
		path=root+image_name     #图片在电脑的路径及名字
		try:
			if not os.path.exists(path):       #如果电脑中不存在该图片
				with open(path,'wb') as f:   #把图片写进图片文件
					con=requests.get(html_1)    #请求图片链接的信息
					f.write(con.content)	 #将图片的二进制写入文件
					i=i+1
					print("爬取成功"+str(i))
			else:
				i=i+1
				print("文件已存在"+str(i))		
		except:    
			i=i+1
			print("爬取失败"+str(i))