如题`
from gevent import monkey
monkey.patch_all
import gevent,requests,csv,time
from gevent.queue import Queue
from bs4 import BeautifulSoup
start=time.time()
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'
}
url_master=['http://www.boohee.com/food/view_menu']
url_list=[]
url_dict={}
for i in range(1,11):
u_i='http://www.boohee.com/food/group/'+str(i)
url_master.append(u_i)
for i in url_master:
url_slave=[]
for j in range(1,11):
url_slave.append(i+'?page='+str(j))
url_list.append(i+'?page='+str(j))
url_dict[i]=url_slave
work=Queue()
for url in url_list:
work.put_nowait(url)
content=[]
def spider():
while not work.empty():
url=work.get_nowait()
res=requests.get(url,headers=headers)
print(url,res.status_code)
soup=BeautifulSoup(res.text,'html.parser')
a=soup.find_all('div',class_='text-box pull-left')
for i in a:
title=i.find('a')['title']
href='http://www.boohee.com'+i.find('a')['href']
calorie=i.find('p').text
content.append([title,href,calorie])
task_list=[]
for x in range(5):
task=gevent.spawn(spider)
task_list.append(task)
gevent.joinall(task_list)
f=open('123.txt','a',newline='',encoding='utf8')
writer=csv.writer(f)
for i in content:
writer.writerow(i)
f.close()
end=time.time()
print(end-start)
print('ok')