原XML文件含有31个省,由于数据太多,只放一个象征性的广东省,其他省结构一致。
<province name="广东省" postcode="440000" >
<city name="广州市" postcode="440100" >
<area name="荔湾区" postcode="440103" />
<area name="越秀区" postcode="440104" />
<area name="海珠区" postcode="440105" />
<area name="天河区" postcode="440106" />
<area name="白云区" postcode="440111" />
<area name="黄埔区" postcode="440112" />
<area name="番禺区" postcode="440113" />
<area name="花都区" postcode="440114" />
<area name="南沙区" postcode="440115" />
<area name="萝岗区" postcode="440116" />
<area name="增城市" postcode="440183" />
<area name="从化市" postcode="440184" />
</city>
<city name="韶关市" postcode="440200" >
<area name="武江区" postcode="440203" />
<area name="浈江区" postcode="440204" />
<area name="曲江区" postcode="440205" />
<area name="始兴县" postcode="440222" />
<area name="仁化县" postcode="440224" />
<area name="翁源县" postcode="440229" />
<area name="乳源瑶族自治县" postcode="440232" />
<area name="新丰县" postcode="440233" />
<area name="乐昌市" postcode="440281" />
<area name="南雄市" postcode="440282" />
</city>
<city name="深圳市" postcode="440300" >
<area name="罗湖区" postcode="440303" />
<area name="福田区" postcode="440304" />
<area name="南山区" postcode="440305" />
<area name="宝安区" postcode="440306" />
<area name="龙岗区" postcode="440307" />
<area name="盐田区" postcode="440308" />
</city>
<city name="珠海市" postcode="440400" >
<area name="香洲区" postcode="440402" />
<area name="斗门区" postcode="440403" />
<area name="金湾区" postcode="440404" />
</city>
<city name="汕头市" postcode="440500" >
<area name="龙湖区" postcode="440507" />
<area name="金平区" postcode="440511" />
<area name="濠江区" postcode="440512" />
<area name="潮阳区" postcode="440513" />
<area name="潮南区" postcode="440514" />
<area name="澄海区" postcode="440515" />
<area name="南澳县" postcode="440523" />
</city>
<city name="佛山市" postcode="440600" >
<area name="禅城区" postcode="440604" />
<area name="南海区" postcode="440605" />
<area name="顺德区" postcode="440606" />
<area name="三水区" postcode="440607" />
<area name="高明区" postcode="440608" />
</city>
<city name="江门市" postcode="440700" >
<area name="蓬江区" postcode="440703" />
<area name="江海区" postcode="440704" />
<area name="新会区" postcode="440705" />
<area name="台山市" postcode="440781" />
<area name="开平市" postcode="440783" />
<area name="鹤山市" postcode="440784" />
<area name="恩平市" postcode="440785" />
</city>
<city name="湛江市" postcode="440800" >
<area name="赤坎区" postcode="440802" />
<area name="霞山区" postcode="440803" />
<area name="坡头区" postcode="440804" />
<area name="麻章区" postcode="440811" />
<area name="遂溪县" postcode="440823" />
<area name="徐闻县" postcode="440825" />
<area name="廉江市" postcode="440881" />
<area name="雷州市" postcode="440882" />
<area name="吴川市" postcode="440883" />
</city>
<city name="茂名市" postcode="440900" >
<area name="茂南区" postcode="440902" />
<area name="茂港区" postcode="440903" />
<area name="电白县" postcode="440923" />
<area name="高州市" postcode="440981" />
<area name="化州市" postcode="440982" />
<area name="信宜市" postcode="440983" />
</city>
<city name="肇庆市" postcode="441200" >
<area name="端州区" postcode="441202" />
<area name="鼎湖区" postcode="441203" />
<area name="广宁县" postcode="441223" />
<area name="怀集县" postcode="441224" />
<area name="封开县" postcode="441225" />
<area name="德庆县" postcode="441226" />
<area name="高要市" postcode="441283" />
<area name="四会市" postcode="441284" />
</city>
<city name="惠州市" postcode="441300" >
<area name="惠城区" postcode="441302" />
<area name="惠阳区" postcode="441303" />
<area name="博罗县" postcode="441322" />
<area name="惠东县" postcode="441323" />
<area name="龙门县" postcode="441324" />
</city>
<city name="梅州市" postcode="441400" >
<area name="梅江区" postcode="441402" />
<area name="梅县" postcode="441421" />
<area name="大埔县" postcode="441422" />
<area name="丰顺县" postcode="441423" />
<area name="五华县" postcode="441424" />
<area name="平远县" postcode="441426" />
<area name="蕉岭县" postcode="441427" />
<area name="兴宁市" postcode="441481" />
</city>
<city name="汕尾市" postcode="441500" >
<area name="城区" postcode="441502" />
<area name="海丰县" postcode="441521" />
<area name="陆河县" postcode="441523" />
<area name="陆丰市" postcode="441581" />
</city>
<city name="河源市" postcode="441600" >
<area name="源城区" postcode="441602" />
<area name="紫金县" postcode="441621" />
<area name="龙川县" postcode="441622" />
<area name="连平县" postcode="441623" />
<area name="和平县" postcode="441624" />
<area name="东源县" postcode="441625" />
</city>
<city name="阳江市" postcode="441700" >
<area name="江城区" postcode="441702" />
<area name="阳西县" postcode="441721" />
<area name="阳东县" postcode="441723" />
<area name="阳春市" postcode="441781" />
</city>
<city name="清远市" postcode="441800" >
<area name="清城区" postcode="441802" />
<area name="佛冈县" postcode="441821" />
<area name="阳山县" postcode="441823" />
<area name="连山壮族瑶族自治县" postcode="441825" />
<area name="连南瑶族自治县" postcode="441826" />
<area name="清新县" postcode="441827" />
<area name="英德市" postcode="441881" />
<area name="连州市" postcode="441882" />
</city>
<city name="东莞市" postcode="441900" >
<area name="市辖区" postcode="441901" />
</city>
<city name="中山市" postcode="442000" >
<area name="市辖区" postcode="442001" />
</city>
<city name="潮州市" postcode="445100" >
<area name="湘桥区" postcode="445102" />
<area name="潮安县" postcode="445121" />
<area name="饶平县" postcode="445122" />
</city>
<city name="揭阳市" postcode="445200" >
<area name="榕城区" postcode="445202" />
<area name="揭东县" postcode="445221" />
<area name="揭西县" postcode="445222" />
<area name="惠来县" postcode="445224" />
<area name="普宁市" postcode="445281" />
</city>
<city name="云浮市" postcode="445300" >
<area name="云城区" postcode="445302" />
<area name="新兴县" postcode="445321" />
<area name="郁南县" postcode="445322" />
<area name="云安县" postcode="445323" />
<area name="罗定市" postcode="445381" />
</city>
</province>
根据原XML文件结构,构建辅助类。
原结构最小单元是县区,市级由若干县区构成,省又由若干市级构成。
因此构建省类内含市级集合,市内含有县区集合。
public class Provinces {
private String provinces_name;
private String provinces_postcode;
private List<City> city_name;
public Provinces(){}
public Provinces(String provinces_name,String postcode,List<City> city_name){
this.provinces_name=provinces_name;
this.provinces_postcode=postcode;
this.city_name=city_name;
}
@Override
public String toString() {
return "Provinces [provinces_name=" + provinces_name
+ ", provinces_postcode=" + provinces_postcode + ", city_name="
+ city_name + "]";
}
}
public class City {
private String city_name;
private String city_postcode;
private List<Area> area;
public City(){}
public City(String name,String city_postcode,List<Area> area){
this.city_name=name;
this.city_postcode=city_postcode;
this.area=area;
}
@Override
public String toString() {
return "City [city_name=" + city_name + ", city_postcode="
+ city_postcode + ", area=" + area + "]";
}
}
public class Area {
private String area_name;
private String area_postcode;
public Area(){}
public Area(String area_name,String area_postcode){
this.area_name=area_name;
this.area_postcode=area_postcode;
}
@Override
public String toString() {
return "Area [area_name=" + area_name + ", area_postcode="
+ area_postcode + "]";
}
}
以下是DOM解析片段:
public class DOMXML {
public static void main(String[] args){
List<Provinces> provinces_arrayList=null;
//实例化一个DOM解析器工厂对象
DocumentBuilderFactory dbf=DocumentBuilderFactory.newInstance();
try {
//根据工厂对象得到解析器对象
DocumentBuilder db=dbf.newDocumentBuilder();
InputStream is=new FileInputStream(new File("d://Resource/city.xml"));
//指定要解析的XML文件,并将其挂在一个文档上
Document document=db.parse(is);
//根据文档对象得到根节点对象
Element root=document.getDocumentElement();
//根据标签名获取指定节点的集合储存到NodeList对象中
NodeList provinces_nodeList=root.getElementsByTagName("province");
//获取集合中的节点个数
int len=provinces_nodeList.getLength();
//TAG PROVINCE_BEGIN
provinces_arrayList=new ArrayList<>();
for(int i=0;i<len;i++){
//根据索引位获取指定位置的Node
Node province_node=provinces_nodeList.item(i);
//为了使用Eelment的 String getAttribute(String name)根据属性名获取属性值的方法,转型
Element province_element=(Element) province_node;
String province_name=province_element.getAttribute("name");
String province_postcode=province_element.getAttribute("postcode");
NodeList child_node=province_element.getElementsByTagName("city");
//TAG CITY_BEGIN
List<City> city_arrayList=new ArrayList<>();
for(int j=0;j<child_node.getLength();j++){
Node city_node=child_node.item(j);
Element city_element=(Element)city_node;
String city_name=city_element.getAttribute("name");
String city_postcode=city_element.getAttribute("postcode");
NodeList area_list=city_element.getElementsByTagName("area");
//TAG AREA_START
List<Area> area_arrayList=new ArrayList<>();
for(int k=0;k<area_list.getLength();k++){
Node area_node=area_list.item(k);
Element area_element=(Element) area_node;
String area_name=area_element.getAttribute("name");
String area_postcode=area_element.getAttribute("postcode");
Area area=new Area(area_name,area_postcode);
area_arrayList.add(area);
}
//TAG AREA_END
City city=new City(city_name,city_postcode,area_arrayList);
city_arrayList.add(city);
}
//TAG CITY_END
Provinces provinces_=new Provinces(province_name,province_postcode,city_arrayList);
provinces_arrayList.add(provinces_);
}
//TAG PROVINCE_END
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
PULL解析,因为PULL解析时遇到标签时要实例化对象,因此需要提供setter和getter方法。
public class PULLXML {
public static void main(String[] args){
List<Provinces> province_list=new ArrayList<>();
List<City> city_list=null;
List<Area> area_list=null;
Provinces province=null;
City city=null;
Area area=null;
try {
//获取Pull解析器工厂对象
XmlPullParserFactory xmlPullParserFactory=XmlPullParserFactory.newInstance();
//根据工厂对象获取Pull对象
XmlPullParser xmlPullParser=xmlPullParserFactory.newPullParser();
//指定解析文件与编码表
InputStream is=new FileInputStream(new File("d://Resource/city.xml"));
xmlPullParser.setInput(is,"utf-8");
//得到时间对象值
int eventType=xmlPullParser.getEventType();
//只要没有解析到xml结尾则一直继续
while(eventType!=XmlPullParser.END_DOCUMENT){
switch(eventType){
case XmlPullParser.START_DOCUMENT:
break;
case XmlPullParser.START_TAG:
String st=xmlPullParser.getName();
if("province".equals(st)){
province=new Provinces();
String province_name=xmlPullParser.getAttributeValue(null,"name");
String province_postcode=xmlPullParser.getAttributeValue(null, "postcode");
province.setProvinces_name(province_name);
province.setProvinces_postcode(province_postcode);
city_list=new ArrayList<>();
}
if("city".equals(st)){
city=new City();
String city_name=xmlPullParser.getAttributeValue(null, "name");
String city_postcode=xmlPullParser.getAttributeValue(null, "postcode");
city.setCity_name(city_name);
city.setCity_postcode(city_postcode);
area_list=new ArrayList<>();
}
if("area".equals(st)){
area=new Area();
String area_name=xmlPullParser.getAttributeValue(null,"name");
String area_postcode=xmlPullParser.getAttributeValue(null, "postcode");
area.setArea_name(area_name);
area.setArea_postcode(area_postcode);
}
break;
case XmlPullParser.TEXT:
break;
case XmlPullParser.END_TAG:
String str=xmlPullParser.getName();
if("province".equals(str)){
province.setCity_name(city_list);
province_list.add(province);
}
if("city".equals(str)){
city.setArea(area_list);
city_list.add(city);
}
if("area".equals(str)){
area_list.add(area);
}
break;
}
eventType=xmlPullParser.next();
}
FileOutputStream fos=new FileOutputStream(new File("d:he.txt"));
fos.write(province_list.toString().getBytes());
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}