数据爬取有问题

# coding:gbk
import requests
from lxml import html
from bs4 import BeautifulSoup
import os
import time

#爬取风景图

def header(referer):
headers = {
'Host': ' i.meizitu.net ',
'Pragma': 'no-cache',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/59.0.3071.115 Safari/537.36',
'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
'Referer': '{}'.format(referer),
}
return headers


def get_html(url,urls):
datas=html.fromstring(requests.get(url).content)
for i in datas.xpath('//ul[@id="pins"]/li/a/@href'):
urls.append(i)


'''
#两种方法虽然都能获取url 但显然这种方式获取的太过杂乱虽然简便
def get_html(url,urls):
r=requests.get(url).text
soup=BeautifulSoup(r,'html')
for url in soup.find_all('a'):
urls.append(url.get('href'))
'''
def get_html_pic(urls,titles,nums):
for i in urls:
try:
datas=html.fromstring(requests.get(i).content)
name=datas.xpath('//h2[@class="main-title"]/text()')[0]
titles.append(name)
num=datas.xpath('//div[@class="pagenavi"]/a[last()-1]/span/text()')[0]
nums.append(num)
except:
return None

def get_pic_url(urls,nums,pic_urls,titles):
path="F:\\python\\图片\\"
for i in urls:
for name in titles:
filename=path+name
if not os.path.exists(filename):
os.mkdir(filename)
x=0
for j in range(int(nums[x])):
url = '{}/{}'.format(i, j+1)
end_datas=html.fromstring(requests.get(url).content)
jpgLink=end_datas.xpath('//div[@class="main-image"]/p/a/img/@src')[0]
print (u'%s:第 %d 张图片已下载完毕!' % (name,j))
try:
with open(filename, "wb+") as jpg:
jpg.write(requests.get(jpgLink, headers=header(jpgLink)).content)
except:
pass
x+=1


def main():
urls=[]
prompt="请输入页码数: "
p=input(prompt)
for i in range(int(p)):
get_html(url,urls)
titles=[]
nums=[]
get_html_pic(urls,titles,nums)
pic_urls=[]
get_pic_url(urls,nums,pic_urls,titles)
main()

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值