前言
爬取数据见上一篇博客:《Python3 爬取OpenStreetMap平台的城市道路交通网数据》
代码
"""
osm 是xml文件 因此 利用 sax解析xml
将读取的结果保存至 csv 文件
"""
import xml.sax
import openpyxl
import re
import os
"""
* func checkIsExist
* desc 检查文件是否存在,若是存在则删除
* para path String
* retu
"""
def checkIsExist(path:str):
if os.path.exists(path):
#删除它
os.remove(path)
# 将数据 转换为 表格
def roadDataToXlsx(data:dict,path:str):
wbk = openpyxl.Workbook()
sd_sheet = wbk.active
sd_sheet.cell(1,1,"id")
sd_sheet.cell(1,2,"name")
sd_sheet.cell(1,3,"lanes")
sd_sheet.cell(1,4,"highway")
sd_sheet.cell(1,5,"oneway")
# sd_sheet.cell(1,6,"surface")
# sd_sheet.cell(1,7,"layer")
sd_sheet.cell(1,6,"city")
global_index = 1
for k,v in data.items():
global_index+=1
sd_sheet.cell(global_index,1,v[0])
sd_sheet.cell(global_index,2,v[1])
sd_sheet.cell(global_index,3,v[2])
sd_sheet.cell(global_index,4,v[3])
sd_sheet.cell(global_index,5,v[4])
# sd_sheet.cell(global_index,6,v[5])
# sd_sheet.cell(global_index,7,v[6])
sd_sheet.cell(global_index,6,v[5])
checkIsExist(path)#删除原文件
wbk.save(path)
# 自定义解析 node 和 way 标签的 类
class WayHandler(xml.sax.ContentHandler):
def __init__(self,city_name):
self.rootName = "" #保存 根标签名称
# way 标签里面的内容
self.way_id = ""
self.tag_name = ""
self.tag_lanes = ""
self.tag_highway = ""
self.tag_oneway = ""
# self.tag_surface = ""
# self.tag_layer = ""
self.tag_waterway = ""
self.tag_power = ""
self.tag_railway = ""
self.city = city_name
self.road_map = {}
# 元素开始事件处理
def startElement(self, tag, attributes):
if tag == "way":
#记录父标签
self.rootName = tag
#当前处理 way标签
self.way_id = attributes.get("id","")
elif tag == "tag":
if self.rootName == "way":
# 父节点是 way
k = attributes.get("k","")
v = attributes.get("v","")
if k == "name":
self.tag_name = v
elif k == "lanes":
self.tag_lanes = v
elif k == "highway":
self.tag_highway = v
elif k == "oneway":
self.tag_oneway = v
# elif k == "surface":
# self.tag_surface = v
# elif k == "layer":
# self.tag_layer = v
elif k == "waterway":
self.tag_waterway = v
elif k == "power":
self.tag_power = v
elif k == "railway":
self.tag_railway = v
# 元素结束事件处理
def endElement(self, tag):
if tag == "way":
self.rootName=""#释放根节点
# 排除 隧道 无名路 无级别路 住宅路 桥
if not re.match(r".*(路|道|街)$",self.tag_name):
self.clear()
return
# if self.tag_highway == "footway":
# self.clear()
# return
if re.match(r".*隧道$",self.tag_name):
self.clear()
return
if self.tag_power=="plant":
self.clear()
return
if self.tag_highway=="":
self.clear()
return
if self.tag_power=="line":
self.clear()
return
if self.tag_power=="substation":
self.clear()
return
if self.tag_railway=="subway":
self.clear()
return
if self.tag_railway=="rail":
self.clear()
return
# 保存至 self.road_map
item = []
item.append(self.way_id)
item.append(self.tag_name)
item.append(self.tag_lanes)
item.append(self.tag_highway)
item.append(self.tag_oneway)
# item.append(self.tag_surface)
# item.append(self.tag_layer)
item.append(self.city)
self.road_map[self.way_id] = item
self.clear()
# 内容事件处理
def characters(self, content):
# print(content)
pass
def clear(self):
self.way_id = ""
self.tag_name = ""
self.tag_lanes = ""
self.tag_highway = ""
self.tag_oneway = ""
# self.tag_surface = ""
# self.tag_layer =""
self.tag_waterway = ""
self.tag_power = ""
self.tag_railway = ""
def saveToXlsx(self):
road_map = {}
city_name = self.city
for k,v in self.road_map.items():
name = v[1]
lanes = v[2]
# 如果没有 放进去
if not road_map.get(name,None):
road_map[name] = v
# 替换没有车道信息的记录
elif lanes and lanes.isalnum() and road_map[name][2] and road_map[name][2].isalnum()==False:
road_map[name][2] = lanes
# 替换车道最少的信息
elif lanes and lanes.isalnum() and road_map[name][2] and road_map[name][2].isalnum() and lanes<road_map[name][2]:
road_map[name][2] = lanes
roadDataToXlsx(road_map,"./static/road/"+city_name+"道路信息表.xlsx")
# 将JSOM文件 转换为 表格 入口函数
def josmToXlsx(city_name)->int:
path = "./static/osm/"+city_name+".osm"# osm文件路径
if not os.path.exists(path):
return 700
# 创建一个 XMLReader
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_namespaces, 0)
# 重写 ContextHandler
nodeHandler = WayHandler(city_name)
parser.setContentHandler(nodeHandler)
parser.parse(path)
nodeHandler.saveToXlsx()
return 200