import urllib.request
from lxml import etree
plate = "4kyouxi" #选择要爬取的版块
pages = 1 #要爬取的页数
def image_download():
for x in range(0,pages):
try:
if(x==0):
url = "http://pic.netbian.com/"+plate+"/index.html" #板块图片url
urldata = urllib.request.urlopen(url).read().decode("gbk") #获取数据
treeurldata = etree.HTML(urldata) #转化成被xpath识别和匹配的对象,并赋值给treeurldata变量
urllist = treeurldata.xpath("//ul[@class='clearfix']/li/a/@href") #xpath表达式
for y in range(0,len(urllist)):
image_url = "http://pic.netbian.com"+urllist[y] #图片详情页url
image_data = urllib.request.urlopen(image_url).read().decode("gbk")
treeimage_data = etree.HTML(image_data)
title = treeimage_data.xpath("//h1/text()")
imagedownload_urldata = treeimage_data.xpath("//div[@class='photo-pic']/a/img/@src")
imagedownload_url = "http://pic.netbian.com"+str(imagedownload_urldata[0])
#print(title)
#print(imagedownload_url+"\n")
localfile = "C:\\Users\\MichstaBe\\Desktop\\image\"+"第"+str(x+1)+"页"+"第"+str(y)+"张"+".jpg" #保存地址
urllib.request.urlretrieve(imagedownload_url,filename=localfile)
print("正在爬取并保存:"+str(title[0])+"\n")
#time.sleep(0.5)
else:
url = "http://pic.netbian.com/"+plate+"/index_"+str(x+1)+".html"
urldata = urllib.request.urlopen(url).read().decode("gbk") #获取数据
treeurldata = etree.HTML(urldata) #转化成被xpath识别和匹配的对象,并赋值给treeurldata变量
urllist = treeurldata.xpath("//ul[@class='clearfix']/li/a/@href") #xpath表达式
for y in range(0,len(urllist)):
image_url = "http://pic.netbian.com"+urllist[y] #图片详情页url
image_data = urllib.request.urlopen(image_url).read().decode("gbk")
treeimage_data = etree.HTML(image_data)
title = treeimage_data.xpath("//h1/text()")
imagedownload_urldata = treeimage_data.xpath("//div[@class='photo-pic']/a/img/@src")
imagedownload_url = "http://pic.netbian.com"+str(imagedownload_urldata[0])
#print(title)
#print(imagedownload_url)
localfile = "C:\\Users\\MichstaBe\\Desktop\\image\"+"第"+str(x+1)+"页"+"第"+str(y)+"张"+".jpg" #保存地址
urllib.request.urlretrieve(imagedownload_url,filename=localfile)
print("正在爬取并保存:"+str(title[0])+"\n")
#time.sleep(0.5)
except Exception as err:
pass
image_download()