# _*_ coding:utf-8 _*_
import urllib
import urllib2
import re
from lxml import etree
#遍历所有471个图片页面,从中拿到每一个美女的html页面
def allurl(url,headers):
for a in range(1,472):
Newurl = url + str(a) +'.html' #拼接471个页面
print Newurl
request = urllib2.Request(Newurl, headers=headers)
response = urllib2.urlopen(request).read()
pattern = etree.HTML(response) #转化为lxml页面
link_list = pattern.xpath('//p[@class="list_h"]/a/@href') #提取每一个美女的html
for link in link_list:
Newurl = "http://www.uumnt.com" + link
#print Newurl
allgirl(Newurl,headers = headers)
def allgirl(url,headers):
request = urllib2.Request(url,headers = headers)
response = urllib2.urlopen(request).read()
pattern = etree.HTML(response)
link_num = pattern.xpath('//div[@class="page"]/a[7]/@href') #拿到每一个美女图片数
#print link_num
#print url
Newurl = url[:-5] + '_'
#print Newurl
for num in link_num:
#pass
Newnum = num[-7:-5]
allimgurl(Newnum,Newurl,headers) #将图片页数和链接传到下一个方法里面做进一步处理
def allimgurl(num,url,headers):
for Num in range(1,int(num)):
Newurl = url + str(Num) + '.html'
#print Newurl
getimgurl(Newurl,headers)
def getimgurl(url,headers):
request = urllib2.Request(url,headers = headers)
response = urllib2.urlopen(request).read()
pattern = etree.HTML(response)
link_list = pattern.xpath('//div[@class="bg-white p15 center imgac clearfix"]/a/img/@src')
name_list = pattern.xpath('//div[@class="bg-white p15 center imgac clearfix"]/a/img/@alt')
for link,name in zip(link_list, name_list):
saveimg(link,name,headers)
print link + 'is saving '
#print name
#print link_list
def saveimg(url,name,headers):
request = urllib2.Request(url,headers = headers)
response = urllib2.urlopen(request).read()
#print url
#print name
with open('/home/cgs/python/uumnt/' + name + '.jpg','wb') as f:
f.write(response)
print name + "is save ok"
if __name__ == "__main__":
url = "http://www.uumnt.com/meinv/list_"
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0","Referer":"https://newimg.uumnt.com/"}
allurl(url,headers)
import urllib
import urllib2
import re
from lxml import etree
#遍历所有471个图片页面,从中拿到每一个美女的html页面
def allurl(url,headers):
for a in range(1,472):
Newurl = url + str(a) +'.html' #拼接471个页面
print Newurl
request = urllib2.Request(Newurl, headers=headers)
response = urllib2.urlopen(request).read()
pattern = etree.HTML(response) #转化为lxml页面
link_list = pattern.xpath('//p[@class="list_h"]/a/@href') #提取每一个美女的html
for link in link_list:
Newurl = "http://www.uumnt.com" + link
#print Newurl
allgirl(Newurl,headers = headers)
def allgirl(url,headers):
request = urllib2.Request(url,headers = headers)
response = urllib2.urlopen(request).read()
pattern = etree.HTML(response)
link_num = pattern.xpath('//div[@class="page"]/a[7]/@href') #拿到每一个美女图片数
#print link_num
#print url
Newurl = url[:-5] + '_'
#print Newurl
for num in link_num:
#pass
Newnum = num[-7:-5]
allimgurl(Newnum,Newurl,headers) #将图片页数和链接传到下一个方法里面做进一步处理
def allimgurl(num,url,headers):
for Num in range(1,int(num)):
Newurl = url + str(Num) + '.html'
#print Newurl
getimgurl(Newurl,headers)
def getimgurl(url,headers):
request = urllib2.Request(url,headers = headers)
response = urllib2.urlopen(request).read()
pattern = etree.HTML(response)
link_list = pattern.xpath('//div[@class="bg-white p15 center imgac clearfix"]/a/img/@src')
name_list = pattern.xpath('//div[@class="bg-white p15 center imgac clearfix"]/a/img/@alt')
for link,name in zip(link_list, name_list):
saveimg(link,name,headers)
print link + 'is saving '
#print name
#print link_list
def saveimg(url,name,headers):
request = urllib2.Request(url,headers = headers)
response = urllib2.urlopen(request).read()
#print url
#print name
with open('/home/cgs/python/uumnt/' + name + '.jpg','wb') as f:
f.write(response)
print name + "is save ok"
if __name__ == "__main__":
url = "http://www.uumnt.com/meinv/list_"
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0","Referer":"https://newimg.uumnt.com/"}
allurl(url,headers)