import urllib.request
import os
# 未写
def proxy_open():
# 安装代理 步骤
# 1. 选择代理
# 2. 建立代理
# 3. 安装代理
pass
def save_pic(url,filename):
req = urllib.request.Request(url)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36')
try:
proxy_open()
response = urllib.request.urlopen(req)
except urllib.error.HTTPError:
return
pic = response.read()
if len(pic) == 0:
print('图片不存在')
return
f = open(filename,'wb')
f.write(pic)
f.close()
def find_pic(html):
pic_list = []
a = html.find('<div class=\"postContent\">')
end = html.find('</p>',a)
while True:
b = html.find('src="',a)
if b > end:
break
b += 5;
a = html.find('"',b)
pic_list.append(html[b:a])
return pic_list
def get_pic(url,count):
req = urllib.request.Request(url)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36')
response = urllib.request.urlopen(req)
html = response.read()
html = html.decode('gbk')
# print(str(html))
# 找到图片链
pic_list = find_pic(html)
if len(pic_list) == 0:
return
# 保存图片
index = 0
for each in pic_list:
print(each)
filename = './OOXX/' + str(count) + '_' + str(index) + '.jpg'
save_pic(each,filename)
index += 1
def download_mm(folder = 'OOXX',pages = 100):
try:
os.mkdir(folder)
os.chdir(folder)
except FileExistsError:
pass
url_module = 'http://www.meizitu.com/a/%d.html'
count = 1
while True:
if count >= pages:
break
url = url_module.replace('%d',str(count))
get_pic(url,count)
print(url)
count += 1
if __name__ == '__main__':
download_mm()
Python 3.4 抓图
最新推荐文章于 2023-09-14 16:51:09 发布