from multiprocessing import Process,Queue
import requests
import re
from lxml.html import etree
import json
import time
from concurrent.futures import ProcessPoolExecutor
def down_load_page_data(req_url):
pattern = re.compile('.*?page=(\d+).*?city_id=(\d+).*?shop_id=(\d+)')
result = re.findall(pattern, req_url)[0]
DATE_SHOW_LOC = result[1]
DATE_SHOW_SHOP = result[2]
response = download_data(req_url,DATE_SHOW_LOC,DATE_SHOW_SHOP)
if response.status_code == 200:
# print(result)
current_page = int(result[0])
if current_page == 1:
data = {'page':current_page,'data':response.text}
with open(str(result[1])+'.html','w') as file:
file.write(response.text)
next_page = re.sub('page=\d+', 'page=' + str(current_page + 1), response.url)
print('正在获取第'+str(current_page+1)+'页',DATE_SHOW_LOC,DATE_SHOW_SHOP)
else:
data = {'page':current_page, 'data': response.text}
if current_page !=1:
if isinstance(json.loads(response.text),list):
next_page = re.sub('page=\d+','page='+str(current_page+1),response.url)
print('正在获取第' + str(current_page+1) + '页', DATE_SHOW_LOC, DATE_SHOW_SHOP)
else:
next_page = None
print(response.text)
print('已获取到' + str(current_page)