Python --解析xml

目录

1, sax解析xml


1, sax解析xml

dom会把整个xml文件读入到内存占用内存大sax是流模式, 边读边解析,占用内存小

#! /usr/bin/env/python3
# -*- coding: utf-8 -*-

'''解析xml中的天气字段'''

from xml.parsers.expat import ParserCreate

xml = '''
    <response>
    <status>1</status>
    <count>1</count>
    <info>OK</info>
    <infocode>10000</infocode>
    <forecasts type="list">
        <forecast>
            <city>北京市</city>
            <adcode>110000</adcode>
            <province>北京</province>
            <reporttime>2021-05-14 18:33:48</reporttime>
            <casts type="list">
                <cast>
                    <date>2021-05-14</date>
                    <week>5</week>
                    <dayweather>多云</dayweather>
                    <nightweather>小雨</nightweather>
                    <daytemp>26</daytemp>
                    <nighttemp>17</nighttemp>
                    <daywind>东南</daywind>
                    <nightwind>东南</nightwind>
                    <daypower>≤3</daypower>
                    <nightpower>≤3</nightpower>
                </cast>
                <cast>
                    <date>2021-05-15</date>
                    <week>6</week>
                    <dayweather>小雨</dayweather>
                    <nightweather>小雨</nightweather>
                    <daytemp>21</daytemp>
                    <nighttemp>14</nighttemp>
                    <daywind>北</daywind>
                    <nightwind>北</nightwind>
                    <daypower>4</daypower>
                    <nightpower>4</nightpower>
                </cast>
                <cast>
                    <date>2021-05-16</date>
                    <week>7</week>
                    <dayweather>多云</dayweather>
                    <nightweather>晴</nightweather>
                    <daytemp>25</daytemp>
                    <nighttemp>13</nighttemp>
                    <daywind>西</daywind>
                    <nightwind>西</nightwind>
                    <daypower>≤3</daypower>
                    <nightpower>≤3</nightpower>
                </cast>
                <cast>
                    <date>2021-05-17</date>
                    <week>1</week>
                    <dayweather>晴</dayweather>
                    <nightweather>晴</nightweather>
                    <daytemp>30</daytemp>
                    <nighttemp>15</nighttemp>
                    <daywind>西南</daywind>
                    <nightwind>西南</nightwind>
                    <daypower>≤3</daypower>
                    <nightpower>≤3</nightpower>
                </cast>
            </casts>
        </forecast>
    </forecasts>
</response>
'''

class MySaxHandler(object):
    def __init__(self, city='Beijing'):
        # 城市字段
        self.city = city
        # 一天天气
        self.one_day_weather = {}
        # 所有天天气
        self.all_day_weather = []
        # 指定城市的所有天气
        self.weather = {'city': self.city, 'forecasts': self.all_day_weather}
        # 天气字段
        self.weather_attrs_list = ['date', 'week', 'dayweather', 'nightweather',         
                                  'daytemp', 'nighttemp', 'daywind', 'nightwind']

    def start_element_handler(self, name, attrs):
        '''
        开始标签处理方法
        :param name: 标签名称
        :param attrs: 标签属性
        :return:
        '''
        self.start_element_name = name
        self.start_element_attrs = attrs

    def end_element_handler(self, name):
        '''
        结束标签处理方法
        :param name: 标签名称
        :return:
        '''
        self.end_element_name = name

        if self.end_element_name in self.weather_attrs_list:
            self.one_day_weather[self.end_element_name] = self.text

        if len(self.one_day_weather) == len(self.weather_attrs_list):
            self.all_day_weather.append(self.one_day_weather)
            self.one_day_weather = {}

    def chardata_handler(self, text):
        '''
        标签信息处理方法
        :param text: 标签信息
        :return:
        '''
        self.text = text

def parser_xml(xml):
    my_handler = MySaxHandler()
    parser = ParserCreate()
    parser.StartElementHandler = my_handler.start_element_handler
    parser.EndElementHandler = my_handler.end_element_handler
    parser.CharacterDataHandler = my_handler.chardata_handler
    # 解析xml
    parser.Parse(xml)

    return my_handler.weather


if __name__ == "__main__":
    result = parser_xml(xml)

    print(result)

----------------------------------------------------------------------------------
# 输出
"C:\Program Files\Python311\python.exe" D:\python_core_programming_learn\chapter_7_dict_set\notes.py 
{'city': 'Beijing', 'forecasts': [{'date': '2021-05-14', 'week': '5', 'dayweather': '多云', 'nightweather': '小雨', 'daytemp': '26', 'nighttemp': '17', 'daywind': '东南', 'nightwind': '东南'}, {'date': '2021-05-15', 'week': '6', 'dayweather': '小雨', 'nightweather': '小雨', 'daytemp': '21', 'nighttemp': '14', 'daywind': '北', 'nightwind': '北'}, {'date': '2021-05-16', 'week': '7', 'dayweather': '多云', 'nightweather': '晴', 'daytemp': '25', 'nighttemp': '13', 'daywind': '西', 'nightwind': '西'}, {'date': '2021-05-17', 'week': '1', 'dayweather': '晴', 'nightweather': '晴', 'daytemp': '30', 'nighttemp': '15', 'daywind': '西南', 'nightwind': '西南'}]}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值