import multiprocessing
import os
import re
import time
import gevent
import requests
pics_url = []
names = []
def get_data():
global pics_url
global names
for i in range(1, 16):
# 1.请求网页页面
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
response = requests.get('http://www.dianping.com/shop/19477398/dishlist/p' + str(i), headers=headers)
response = response.content.decode('utf-8')
# 2通过正则匹配到所有图片地址
# res = re.findall(r'<div\sclass="shop-food-img">[\s]*<img\ssrc=(.*\.jpg).*\salt="(.*)">', response)
res = re.findall(r'<div\sclass="shop-food-img">[\s]*<img\ssrc=(.*)\salt="(.*)">', response)
for re_infor in res:
pics_url.append(list(re_infor)[0])
names.append(list(re_infor)[1])
print(len(pics_url))
print(names)
# 开始协程下载
def down(pics_url, names):
response = requests.get(pics_url)
try:
with open('./pics/' + names + '.jpg', 'wb') as f:
f.write(response.content)
except Exception as e:
print(e)
# 开启进程监听下载进度
def thread_down(pics_url):
print('*'*100)
while True:
time.sleep(0.5)
if len(pics_url) == 0:
print('图片个数为0')
break
print('\r下载了%0.2f%%' % (len(os.listdir('./pics')) / len(pics_url)*100),end='')
if (len(os.listdir('./pics')) == len(pics_url)):
break
def download_image():
g_list = []
for i in range(len(pics_url)):
g_list.append(gevent.spawn(down, pics_url[i], names[i]))
gevent.joinall(g_list)
def main():
global pics_url
get_data()
if not os.path.exists('./pics'):
os.mkdir('./pics')
p1 = multiprocessing.Process(target=thread_down,args=(pics_url,))
p1.start()
download_image()
if __name__ == '__main__':
main()