千叶网抓取图片
说明:抓取为千叶网首页某一类下面的加载的所有照片,url_source 链接里面的所有图片
利用xpath方式解析网址
将图片下载到本地指定文件夹
代码如下:
#导入库
import requests
from lxml import html
url_source = 'http://qianye88.com/5120x2880/'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
import os
#获取链接函数
def get_links (url):
response = requests.get( url, headers = headers).content
sel = html.fromstring( response )
return sel
#xpath方式解析网页得到图片链接和名称
url_first = get_links(url_source).xpath ('//a[@class="image"]/@href')
name_picture = sel.xpath('//a[@class="title"]/@title')
#print (name_picture)
#图片链接需要进入下一级链接才能得到大图,没有登录无法下载超高清图片
for url in url_first :
url_total = 'http://qianye88.com' + url
url_sec = get_links(url_total)
url_name = url_sec.xpath('//div[@class="content-left layout fl"]/img/@src')
pic_name = url_sec.xpath('//div[@class="content-left layout fl"]/h1/text()')
#picture = requests.get(url_name[0],headers=headers)
##保存到本地文件夹
root = "D:\CP\Python\Exercise\jupyter_notebook\pictureDownload\\"
path = root+pic_name[0]+'.jpg'
try:
if not os.path.exists(root):
os.mkdir(root)
if not os.path.exists(path):
print(url_name[0])
r = requests.get(url_name[0],headers=headers)
with open(path, 'wb') as f:
f.write(r.content)
#f = r.replace(r, "1.jpg")
f.close()
print("文件保存成功")
else:
print("文件已存在")
except:
print("爬去失败")