大家好,我是天空之城。今天给大家带来小福利
from bs4 import BeautifulSoup
from gevent import monkey
monkey.patch_all()
import gevent,time,requests
from gevent.queue import Queue
import openpyxl
start=time.time()
wb=openpyxl.Workbook()
sheet=wb.active
sheet.title='食物热量表'
sheet['A1']='食物名称'
sheet['B1']='网址'
sheet['C1']='食物热量'
url_list=[]
for k in range(1,11):
for j in range(1, 11):
url_list.append('http://www.boohee.com/food/group/{}?page={}'.format(str(k),str(j)))
for h in range(1,11):
url_list.append('http://www.boohee.com/food/view_menu?page={}'.format(str(h)))
#print(url_list)
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0',
'Referer':'http://www.boohee.com/food/group/1',
'Cookie':'Hm_lvt_7263598dfd4db0dc29539a51f116b23a=1588486427; Hm_lpvt_7263598dfd4db0dc29539a51f116b23a=1588487435'
}
work=Queue()
for url in url_list:
work.put_nowait(url)
def getdata():
while not work.empty():
url=work.get_nowait()
res=requests.get(url,headers=headers)
#print(res.status_code)
soup = BeautifulSoup(res.text, 'html.parser')
items = soup.find('ul', class_='food-list')
list = items.find_all('li')
for film in list:
name = film.find('h4').text
link = film.find('a')['href']
wzlink='http://www.boohee.com'+link
reliang = film.find('p').text
print(name, wzlink, reliang)
row=[name,wzlink,reliang]
sheet.append(row)
wb.save('食物热量对照表.xlsx')
task_list=[]
for x in range(5):
task=gevent.spawn(getdata)
task_list.append(task)
gevent.joinall(task_list)
end=time.time()
print(end-start)