# -*- coding: utf-8 -*-
"""
Created on Mon Jul 8 15:57:13 2019
@author: HY
"""
import requests
from bs4 import BeautifulSoup
import re
import random
import datetime
from urllib import error
import urllib
import lxml
import os
def mm(url):
src_list = []
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36'}
total_page = 12
for i in list(range(1,total_page + 1)):
stop = random.uniform(1,3)
url = "https://www.ivsky.com/tupian/juzi_t22164/index_"+str(i) +".html"
req = requests.get(url=url,headers=header)
html = req.text
# soup = BeautifulSoup(html,"html.parser")
# src = soup.select('img').get('src')
s=re.compile(r'<img src="(.*?)"')
ls = re.findall(s,html)
src_list.append(ls)
return src_list
def download(url,sl):
path = 'img\%s.jpg'%sl ## 保存路径
urllib.request.urlretrieve(url,path) ## 下载photos
if __name__=='__main__':
url="https://www.ivsky.com/tupian/juzi_t22164/index_0.html"
src_list = mm(url)
for i in range(len(src_list)):
for j in src_list:
for e in range(len(j)):
sl = str(i)+str(len(j))+str(e)
url = 'http:' + j[e]
download(url,sl)
time_str = datetime.datetime.now().isoformat()
print('All Done!')