XML解析

原XML文件含有31个省,由于数据太多,只放一个象征性的广东省,其他省结构一致。

 <province name="广东省" postcode="440000" >
    <city name="广州市" postcode="440100" >
        <area name="荔湾区" postcode="440103" />
        <area name="越秀区" postcode="440104" />
        <area name="海珠区" postcode="440105" />
        <area name="天河区" postcode="440106" />
        <area name="白云区" postcode="440111" />
        <area name="黄埔区" postcode="440112" />
        <area name="番禺区" postcode="440113" />
        <area name="花都区" postcode="440114" />
        <area name="南沙区" postcode="440115" />
        <area name="萝岗区" postcode="440116" />
        <area name="增城市" postcode="440183" />
        <area name="从化市" postcode="440184" />
    </city>
    <city name="韶关市" postcode="440200" >
        <area name="武江区" postcode="440203" />
        <area name="浈江区" postcode="440204" />
        <area name="曲江区" postcode="440205" />
        <area name="始兴县" postcode="440222" />
        <area name="仁化县" postcode="440224" />
        <area name="翁源县" postcode="440229" />
        <area name="乳源瑶族自治县" postcode="440232" />
        <area name="新丰县" postcode="440233" />
        <area name="乐昌市" postcode="440281" />
        <area name="南雄市" postcode="440282" />
    </city>
    <city name="深圳市" postcode="440300" >
        <area name="罗湖区" postcode="440303" />
        <area name="福田区" postcode="440304" />
        <area name="南山区" postcode="440305" />
        <area name="宝安区" postcode="440306" />
        <area name="龙岗区" postcode="440307" />
        <area name="盐田区" postcode="440308" />
    </city>
    <city name="珠海市" postcode="440400" >
        <area name="香洲区" postcode="440402" />
        <area name="斗门区" postcode="440403" />
        <area name="金湾区" postcode="440404" />
    </city>
    <city name="汕头市" postcode="440500" >
        <area name="龙湖区" postcode="440507" />
        <area name="金平区" postcode="440511" />
        <area name="濠江区" postcode="440512" />
        <area name="潮阳区" postcode="440513" />
        <area name="潮南区" postcode="440514" />
        <area name="澄海区" postcode="440515" />
        <area name="南澳县" postcode="440523" />
    </city>
    <city name="佛山市" postcode="440600" >
        <area name="禅城区" postcode="440604" />
        <area name="南海区" postcode="440605" />
        <area name="顺德区" postcode="440606" />
        <area name="三水区" postcode="440607" />
        <area name="高明区" postcode="440608" />
    </city>
    <city name="江门市" postcode="440700" >
        <area name="蓬江区" postcode="440703" />
        <area name="江海区" postcode="440704" />
        <area name="新会区" postcode="440705" />
        <area name="台山市" postcode="440781" />
        <area name="开平市" postcode="440783" />
        <area name="鹤山市" postcode="440784" />
        <area name="恩平市" postcode="440785" />
    </city>
    <city name="湛江市" postcode="440800" >
        <area name="赤坎区" postcode="440802" />
        <area name="霞山区" postcode="440803" />
        <area name="坡头区" postcode="440804" />
        <area name="麻章区" postcode="440811" />
        <area name="遂溪县" postcode="440823" />
        <area name="徐闻县" postcode="440825" />
        <area name="廉江市" postcode="440881" />
        <area name="雷州市" postcode="440882" />
        <area name="吴川市" postcode="440883" />
    </city>
    <city name="茂名市" postcode="440900" >
        <area name="茂南区" postcode="440902" />
        <area name="茂港区" postcode="440903" />
        <area name="电白县" postcode="440923" />
        <area name="高州市" postcode="440981" />
        <area name="化州市" postcode="440982" />
        <area name="信宜市" postcode="440983" />
    </city>
    <city name="肇庆市" postcode="441200" >
        <area name="端州区" postcode="441202" />
        <area name="鼎湖区" postcode="441203" />
        <area name="广宁县" postcode="441223" />
        <area name="怀集县" postcode="441224" />
        <area name="封开县" postcode="441225" />
        <area name="德庆县" postcode="441226" />
        <area name="高要市" postcode="441283" />
        <area name="四会市" postcode="441284" />
    </city>
    <city name="惠州市" postcode="441300" >
        <area name="惠城区" postcode="441302" />
        <area name="惠阳区" postcode="441303" />
        <area name="博罗县" postcode="441322" />
        <area name="惠东县" postcode="441323" />
        <area name="龙门县" postcode="441324" />
    </city>
    <city name="梅州市" postcode="441400" >
        <area name="梅江区" postcode="441402" />
        <area name="梅县" postcode="441421" />
        <area name="大埔县" postcode="441422" />
        <area name="丰顺县" postcode="441423" />
        <area name="五华县" postcode="441424" />
        <area name="平远县" postcode="441426" />
        <area name="蕉岭县" postcode="441427" />
        <area name="兴宁市" postcode="441481" />
    </city>
    <city name="汕尾市" postcode="441500" >
        <area name="城区" postcode="441502" />
        <area name="海丰县" postcode="441521" />
        <area name="陆河县" postcode="441523" />
        <area name="陆丰市" postcode="441581" />
    </city>
    <city name="河源市" postcode="441600" >
        <area name="源城区" postcode="441602" />
        <area name="紫金县" postcode="441621" />
        <area name="龙川县" postcode="441622" />
        <area name="连平县" postcode="441623" />
        <area name="和平县" postcode="441624" />
        <area name="东源县" postcode="441625" />
    </city>
    <city name="阳江市" postcode="441700" >
        <area name="江城区" postcode="441702" />
        <area name="阳西县" postcode="441721" />
        <area name="阳东县" postcode="441723" />
        <area name="阳春市" postcode="441781" />
    </city>
    <city name="清远市" postcode="441800" >
        <area name="清城区" postcode="441802" />
        <area name="佛冈县" postcode="441821" />
        <area name="阳山县" postcode="441823" />
        <area name="连山壮族瑶族自治县" postcode="441825" />
        <area name="连南瑶族自治县" postcode="441826" />
        <area name="清新县" postcode="441827" />
        <area name="英德市" postcode="441881" />
        <area name="连州市" postcode="441882" />
    </city>
    <city name="东莞市" postcode="441900" >
        <area name="市辖区" postcode="441901" />
    </city>
    <city name="中山市" postcode="442000" >
        <area name="市辖区" postcode="442001" />
    </city>
    <city name="潮州市" postcode="445100" >
        <area name="湘桥区" postcode="445102" />
        <area name="潮安县" postcode="445121" />
        <area name="饶平县" postcode="445122" />
    </city>
    <city name="揭阳市" postcode="445200" >
        <area name="榕城区" postcode="445202" />
        <area name="揭东县" postcode="445221" />
        <area name="揭西县" postcode="445222" />
        <area name="惠来县" postcode="445224" />
        <area name="普宁市" postcode="445281" />
    </city>
    <city name="云浮市" postcode="445300" >
        <area name="云城区" postcode="445302" />
        <area name="新兴县" postcode="445321" />
        <area name="郁南县" postcode="445322" />
        <area name="云安县" postcode="445323" />
        <area name="罗定市" postcode="445381" />
    </city>
  </province>


根据原XML文件结构,构建辅助类。
原结构最小单元是县区,市级由若干县区构成,省又由若干市级构成。
因此构建省类内含市级集合,市内含有县区集合。


public class Provinces {
    private String provinces_name;
    private String provinces_postcode;
    private List<City> city_name;
    public Provinces(){}
    public Provinces(String provinces_name,String postcode,List<City> city_name){
        this.provinces_name=provinces_name;
        this.provinces_postcode=postcode;
        this.city_name=city_name;
    }
    @Override
    public String toString() {
        return "Provinces [provinces_name=" + provinces_name
                + ", provinces_postcode=" + provinces_postcode + ", city_name="
                + city_name + "]";
    }
}


public class City {
    private String city_name;
    private String city_postcode;
    private List<Area> area;
    public City(){}
    public City(String name,String city_postcode,List<Area> area){
        this.city_name=name;
        this.city_postcode=city_postcode;
        this.area=area;
    }
    @Override
    public String toString() {
        return "City [city_name=" + city_name + ", city_postcode="
                + city_postcode + ", area=" + area + "]";
    }
}


public class Area {
    private String area_name;
    private String area_postcode;
    public Area(){}
    public Area(String area_name,String area_postcode){
        this.area_name=area_name;
        this.area_postcode=area_postcode;
    }
    @Override
    public String toString() {
        return "Area [area_name=" + area_name + ", area_postcode="
                + area_postcode + "]";
    }
}

以下是DOM解析片段:

public class DOMXML {
    public static void main(String[] args){
        List<Provinces> provinces_arrayList=null;
        //实例化一个DOM解析器工厂对象
        DocumentBuilderFactory dbf=DocumentBuilderFactory.newInstance();
        try {
            //根据工厂对象得到解析器对象
            DocumentBuilder db=dbf.newDocumentBuilder();
            InputStream is=new FileInputStream(new File("d://Resource/city.xml"));
            //指定要解析的XML文件,并将其挂在一个文档上
            Document document=db.parse(is);
            //根据文档对象得到根节点对象
            Element root=document.getDocumentElement();
            //根据标签名获取指定节点的集合储存到NodeList对象中
            NodeList provinces_nodeList=root.getElementsByTagName("province");
            //获取集合中的节点个数
            int len=provinces_nodeList.getLength(); 
            //TAG PROVINCE_BEGIN
            provinces_arrayList=new ArrayList<>();
            for(int i=0;i<len;i++){
                //根据索引位获取指定位置的Node
                Node province_node=provinces_nodeList.item(i);
                //为了使用Eelment的 String getAttribute(String name)根据属性名获取属性值的方法,转型
                Element province_element=(Element) province_node;
                String province_name=province_element.getAttribute("name");
                String province_postcode=province_element.getAttribute("postcode");
                NodeList child_node=province_element.getElementsByTagName("city");
                //TAG CITY_BEGIN
                List<City> city_arrayList=new ArrayList<>();
                for(int j=0;j<child_node.getLength();j++){
                    Node city_node=child_node.item(j);
                    Element city_element=(Element)city_node;
                    String city_name=city_element.getAttribute("name");
                    String city_postcode=city_element.getAttribute("postcode");
                    NodeList area_list=city_element.getElementsByTagName("area");
                    //TAG AREA_START
                    List<Area> area_arrayList=new ArrayList<>();    
                    for(int k=0;k<area_list.getLength();k++){
                        Node area_node=area_list.item(k);
                        Element area_element=(Element) area_node;
                        String area_name=area_element.getAttribute("name");
                        String area_postcode=area_element.getAttribute("postcode");
                        Area area=new Area(area_name,area_postcode);
                        area_arrayList.add(area);
                    }
                    //TAG AREA_END
                    City city=new City(city_name,city_postcode,area_arrayList);
                    city_arrayList.add(city);
                }
                //TAG CITY_END
                Provinces provinces_=new Provinces(province_name,province_postcode,city_arrayList);
                provinces_arrayList.add(provinces_);
            }
            //TAG PROVINCE_END
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
}

PULL解析,因为PULL解析时遇到标签时要实例化对象,因此需要提供setter和getter方法。

public class PULLXML {
    public static void main(String[] args){
        List<Provinces> province_list=new ArrayList<>();
        List<City> city_list=null;
        List<Area> area_list=null;
        Provinces province=null;
        City city=null;
        Area area=null;

        try {
            //获取Pull解析器工厂对象
            XmlPullParserFactory  xmlPullParserFactory=XmlPullParserFactory.newInstance();
            //根据工厂对象获取Pull对象
            XmlPullParser xmlPullParser=xmlPullParserFactory.newPullParser();
            //指定解析文件与编码表
            InputStream is=new FileInputStream(new File("d://Resource/city.xml"));
            xmlPullParser.setInput(is,"utf-8");
            //得到时间对象值
            int eventType=xmlPullParser.getEventType();
            //只要没有解析到xml结尾则一直继续
            while(eventType!=XmlPullParser.END_DOCUMENT){
                switch(eventType){
                case XmlPullParser.START_DOCUMENT:

                    break;

                case XmlPullParser.START_TAG:

                    String st=xmlPullParser.getName();
                    if("province".equals(st)){
                        province=new Provinces();
                        String province_name=xmlPullParser.getAttributeValue(null,"name");
                        String province_postcode=xmlPullParser.getAttributeValue(null, "postcode");
                        province.setProvinces_name(province_name);
                        province.setProvinces_postcode(province_postcode);
                        city_list=new ArrayList<>();
                    }
                    if("city".equals(st)){
                        city=new City();
                        String city_name=xmlPullParser.getAttributeValue(null, "name");
                        String city_postcode=xmlPullParser.getAttributeValue(null, "postcode");
                        city.setCity_name(city_name);
                        city.setCity_postcode(city_postcode);
                        area_list=new ArrayList<>();
                    }
                    if("area".equals(st)){
                        area=new Area();
                        String area_name=xmlPullParser.getAttributeValue(null,"name");
                        String area_postcode=xmlPullParser.getAttributeValue(null, "postcode");
                        area.setArea_name(area_name);
                        area.setArea_postcode(area_postcode);
                    }
                    break;

                case XmlPullParser.TEXT:

                    break;

                case XmlPullParser.END_TAG:

                    String str=xmlPullParser.getName();
                    if("province".equals(str)){
                        province.setCity_name(city_list);
                        province_list.add(province);
                    }
                    if("city".equals(str)){
                        city.setArea(area_list);
                        city_list.add(city);
                    }
                    if("area".equals(str)){
                        area_list.add(area);
                    }
                    break;
                }
                eventType=xmlPullParser.next();
            }
            FileOutputStream fos=new FileOutputStream(new File("d:he.txt"));
            fos.write(province_list.toString().getBytes());
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值