import urllib.request
import re
import time
import os
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
'Referer': 'http://www.mm131.com/xiaohua/'
}
for page in range(1, 7):
print('这是第%s页' % page)
if page == 1:
url = 'http://www.mm131.com/xiaohua/'
elif page >= 2:
url = 'http://www.mm131.com/xiaohua/list_2_{}.html'.format(page)
# print(url)
# 构建一个request请求,其中包含请求头与url
request = urllib.request.Request(url=url, headers=headers)
# 发送请求得到响应
response = urllib.request.urlopen(request)
# print(response.read().decode('gbk'))
# 拿到响应内容
content = response.read().decode('gbk')
# print(content)
# # 正则匹配
# '''<a target="_blank" href="http://www.mm131.com/xiaohua/634.html"><img src="http://img1.mm131.me/pic/634/m634.jpg
# " alt="性感校花路子滢 爆乳沐浴私房写真" width="120" height="160">性感校花路子滢 爆乳沐</a>'''
ret = re.compile(r'<a target="_blank" href=".*?"><img src="(.*?)" alt="(.*?)" width=".*?" height=".*?">.*?</a>',
re.S)
result = ret.findall(content)
# print(result)
dirname = '校花图'
if not os.path.exists(dirname):
os.mkdir(dirname)
for img in result:
# 图片src
image = img[0]
print(image)
# 图片的名字
filename = img[1] + '.' + image.split('.')[-1]
# 保存图片的路径,拼接
# print(filename)
filepath = os.path.join(dirname, filename)
# print(filepath)
result1 = urllib.request.Request(url=image, headers=headers)
response1 = urllib.request.urlopen(result1)
with open(filepath, 'wb') as fp:
fp.write(response1.read())
# print('正在下载...%s' % filename)
# 下载图片并保存相应路径
# urllib.request.urlretrieve(image, filepath)
time.sleep(2)
# print('结束下载')
time.sleep(2)
代理池案例:
import urllib.request
import os
for i in range(4200, 4461):
os.mkdir('tupian/' + str(i))
for j in range(60):
try:
url = 'http://img1.mm131.me/pic/' + str(i) + '/' + str(j) + '.jpg'
print(url)
# urllib.request.urlretrieve(url, 'lala.jpg')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36',
'Referer': 'https://www.sogou.com/link?url=DSOYnZeCC_o7btUgpK402wmc9YOcsOr4cOOT57O29F8'
}
request = urllib.request.Request(url=url, headers=headers)
response = urllib.request.urlopen(request)
with open('tupian/' + str(i) + '/' + str(j) + '.jpg', 'wb') as fp:
fp.write(response.read())
except Exception as e:
print('下载失败,下载下一条')
break