本文正在参加「Python主题月」,详情查看https://juejin.cn/post/6979532761954533390/
最近,老婆老是问我晚上吃啥饭,关键我也很蒙圈啊,除了平时吃的那些,还有啥? 忽然有一天发现下厨房这个神器,里面好多菜谱,而且各种分类都有,不论你是爱吃肉菜,还是素菜,还是凉菜,只要你想吃,下厨房里基本都有, 虽然下厨房的app很方便,但是奈何在老婆问我吃啥的时候,我一般都是在公司加班,不方便打开下厨房的网站或者app, 那么怎么办呢? 我把他们的菜谱搞下来一部分不就阔以了。 整理成excel表格形式, 老婆问我晚上吃啥的时候,我打开excel,一看就知道了,哈哈哈
``` import requests from pyquery import PyQuery as pq import time import xlsxwriter as xw
header = { "Host": "www.xiachufang.com", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8", "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", "Accept-Encoding": "gzip, deflate, br", "Referer": "https://www.xiachufang.com/category/", "Upgrade-Insecure-Requests": "1" }
rowi = 2 # 从第二行开始写入数据
获取做菜材料
def get_meterial(div): alltr = "" table = div.find('table') tr = table.find("tr") for trs in tr.items(): txt = trs.find("td").text() alltr = alltr + txt + ';' return alltr
获取做菜步骤
def get_steps(div): alltxt = "" container = div('.container') for lis in container.items(): alltxt = alltxt + lis.text() return alltxt
写入excel
def xwtoExcel(worksheet1,data, fileName,rowi): # xlsxwriter库储存数据到excel for j in range(len(data)): insertData = [data[j]["name"],data[j]["author"],data[j]["desc"],data[j]["steps"],data[j]["ings"],data[j]["url"]] row = 'A' + str(rowi) worksheet1.writerow(row, insertData)
获取子页面
def getpage(worksheet1,fileName,name, url): global rowi res = requests.get(url, headers=header) html = pq(res.text) panel = html('.main-panel') author = panel('.author').text() desc = panel('.desc').text() ings = getmeterial(panel('.ings')) steps = getsteps(panel('.steps')) testData = [ {"name": name, "author": author,"desc":desc,"steps":steps,"ings":ings,"url":url}, ] rowi =rowi+1 xwtoExcel(worksheet1,testData, fileName,rowi) print(rowi,testData)
def getmenu(): count=30 #页数 fileName = 'caipu.xlsx' workbook = xw.Workbook(fileName) # 创建工作簿 worksheet1 = workbook.addworksheet("sheet1") # 创建子表 worksheet1.activate() # 激活表 title = ['标题','作者','描述','步骤','用料',"url"] # 设置表头 worksheet1.writerow('A1', title) # 从A1单元格开始写入表头 namelist=["40076","52354","51848","52351","20137"] for nam in namelist: for i in range(count): url = "https://www.xiachufang.com/category/{}/?page={}".format(nam,str(i+1)) res = requests.get(url, headers=header) html = pq(res.text) normallist = html('.normal-recipe-list').find("ul").find("li") for lists in normallist.items(): info = lists('.info') ainfo = info('.name').find('a') name = ainfo.text() href = ainfo.attr("href") get_page(worksheet1,fileName,name, "https://www.xiachufang.com" + href) time.sleep(1) workbook.close() # 关闭表
if name == "main": get_menu() ```