目录
1, sax解析xml
dom会把整个xml文件读入到内存,占用内存大,sax是流模式, 边读边解析,占用内存小
#! /usr/bin/env/python3
# -*- coding: utf-8 -*-
'''解析xml中的天气字段'''
from xml.parsers.expat import ParserCreate
xml = '''
<response>
<status>1</status>
<count>1</count>
<info>OK</info>
<infocode>10000</infocode>
<forecasts type="list">
<forecast>
<city>北京市</city>
<adcode>110000</adcode>
<province>北京</province>
<reporttime>2021-05-14 18:33:48</reporttime>
<casts type="list">
<cast>
<date>2021-05-14</date>
<week>5</week>
<dayweather>多云</dayweather>
<nightweather>小雨</nightweather>
<daytemp>26</daytemp>
<nighttemp>17</nighttemp>
<daywind>东南</daywind>
<nightwind>东南</nightwind>
<daypower>≤3</daypower>
<nightpower>≤3</nightpower>
</cast>
<cast>
<date>2021-05-15</date>
<week>6</week>
<dayweather>小雨</dayweather>
<nightweather>小雨</nightweather>
<daytemp>21</daytemp>
<nighttemp>14</nighttemp>
<daywind>北</daywind>
<nightwind>北</nightwind>
<daypower>4</daypower>
<nightpower>4</nightpower>
</cast>
<cast>
<date>2021-05-16</date>
<week>7</week>
<dayweather>多云</dayweather>
<nightweather>晴</nightweather>
<daytemp>25</daytemp>
<nighttemp>13</nighttemp>
<daywind>西</daywind>
<nightwind>西</nightwind>
<daypower>≤3</daypower>
<nightpower>≤3</nightpower>
</cast>
<cast>
<date>2021-05-17</date>
<week>1</week>
<dayweather>晴</dayweather>
<nightweather>晴</nightweather>
<daytemp>30</daytemp>
<nighttemp>15</nighttemp>
<daywind>西南</daywind>
<nightwind>西南</nightwind>
<daypower>≤3</daypower>
<nightpower>≤3</nightpower>
</cast>
</casts>
</forecast>
</forecasts>
</response>
'''
class MySaxHandler(object):
def __init__(self, city='Beijing'):
# 城市字段
self.city = city
# 一天天气
self.one_day_weather = {}
# 所有天天气
self.all_day_weather = []
# 指定城市的所有天气
self.weather = {'city': self.city, 'forecasts': self.all_day_weather}
# 天气字段
self.weather_attrs_list = ['date', 'week', 'dayweather', 'nightweather',
'daytemp', 'nighttemp', 'daywind', 'nightwind']
def start_element_handler(self, name, attrs):
'''
开始标签处理方法
:param name: 标签名称
:param attrs: 标签属性
:return:
'''
self.start_element_name = name
self.start_element_attrs = attrs
def end_element_handler(self, name):
'''
结束标签处理方法
:param name: 标签名称
:return:
'''
self.end_element_name = name
if self.end_element_name in self.weather_attrs_list:
self.one_day_weather[self.end_element_name] = self.text
if len(self.one_day_weather) == len(self.weather_attrs_list):
self.all_day_weather.append(self.one_day_weather)
self.one_day_weather = {}
def chardata_handler(self, text):
'''
标签信息处理方法
:param text: 标签信息
:return:
'''
self.text = text
def parser_xml(xml):
my_handler = MySaxHandler()
parser = ParserCreate()
parser.StartElementHandler = my_handler.start_element_handler
parser.EndElementHandler = my_handler.end_element_handler
parser.CharacterDataHandler = my_handler.chardata_handler
# 解析xml
parser.Parse(xml)
return my_handler.weather
if __name__ == "__main__":
result = parser_xml(xml)
print(result)
----------------------------------------------------------------------------------
# 输出
"C:\Program Files\Python311\python.exe" D:\python_core_programming_learn\chapter_7_dict_set\notes.py
{'city': 'Beijing', 'forecasts': [{'date': '2021-05-14', 'week': '5', 'dayweather': '多云', 'nightweather': '小雨', 'daytemp': '26', 'nighttemp': '17', 'daywind': '东南', 'nightwind': '东南'}, {'date': '2021-05-15', 'week': '6', 'dayweather': '小雨', 'nightweather': '小雨', 'daytemp': '21', 'nighttemp': '14', 'daywind': '北', 'nightwind': '北'}, {'date': '2021-05-16', 'week': '7', 'dayweather': '多云', 'nightweather': '晴', 'daytemp': '25', 'nighttemp': '13', 'daywind': '西', 'nightwind': '西'}, {'date': '2021-05-17', 'week': '1', 'dayweather': '晴', 'nightweather': '晴', 'daytemp': '30', 'nighttemp': '15', 'daywind': '西南', 'nightwind': '西南'}]}