此篇内容使用到了以下模块:
1.使用openpyxl实现excel的读写
2.使用pypinyin实现汉子转拼音
3.使用geohash获取经纬度编码
4.使用requests中的utils方法实现cookies转字典和字典转cookies
5.使用json对列表和字典以及字符串之间进行转换
通过此次的学习可以爬取到全国2000多个城市指定位置附近的商家信息,如果再进一步挖掘的就具备了很大的商业价值
import requests
from pypinyin import lazy_pinyin
import geohash
import json
import openpyxl
session = requests.session()
# 将处理后的信息写入excel文件中
def write_info():
rest_list = get_nearby_restaurants_by_menu()
if rest_list != False:
# 新建excel
wb = openpyxl.Workbook()
# 获取工作表
sheet = wb.active
# 工作表名称
sheet.title = "饿了么商家信息"
# 设置单元格的列宽
sheet.column_dimensions['A'].width = 40
sheet.column_dimensions['B'].width = 20
sheet.column_dimensions['C'].width = 60
sheet.column_dimensions['D'].width = 20
sheet.column_dimensions['E'].width = 30
# 表头信息
headers =['商家名称','月销量','商家活动','配送费','营业时间']
sheet.append(headers)
# 写入数据
for rest in rest_list:
sheet.append([rest['name'],rest['order_num'],rest['activities'],rest['ship_fee'],rest['open_hours']])
# 保存数据
wb.save("饿了么商家信息.xlsx")
# 信息处理,获取所需要的信息
def get_result(res):
restaurants_infos = []
for info in res:
infos = {}
name = info['name'] # 商家名称
recent_order_num_display = info['business_info']
order_num = json.loads(recent_order_num_display)['recent_order_num_display'] # 月销量
open_hours = info['opening_hours'][0] # 营业时间
latitude = info['latitude'] # 商家地理位置经纬度
longitude = info['longitude']
piecewise_agent_fee = info['piecewise_agent_fee']['tips'] # 配送费
activities = info['activities'][0]['tips'] # 活动
infos['name'] = name
infos['order_num'] = order_num
infos['open_hours'] = open_hours
infos['latitude'] = latitude
infos['longitude'] = longitude
infos['ship_fee'] = piecewise_agent_fee
infos['activities'] = activities
restaurants_infos.append(infos)
# print(name + " " + str(order_num) + " " + str(open_hours) + " " + str(latitude) + " " + str(
# longitude) + " " + str