对于普通大小xml的解析,现在有许多的插件能很好的支持,比如dom4j,JDOM等,由于已经做过一层封装。因此,用起来也很方便。
当然对于这样的xml解析没有什么问题,也不必花费笔墨去探讨了。
前两天遇到一个300多M的超大xml解析,基本用什么编辑器打开,什么编辑器就崩溃的。
如果用类似dom4j等解析方式,在解析之前需要把文件先加载到内存
File xmlFile = new File(fileName);
SAXReader saxReader = new SAXReader();
Document document = saxReader.read(xmlFile);
由于文件超大,基本什么服务器都会OutOfMemory;不给力了啊····结果也可想而知
后来使用了
SAX处理方式,解析一点文件读取一点到内存,这样就不会再出现内存溢出情况,具体请看代码
AnalyticElongHotelXml analyticElongHotelXml = new AnalyticElongHotelXml(elongHotelsDAO);
SAXParserFactory sf = SAXParserFactory.newInstance();
SAXParser sp = sf.newSAXParser();
import com.mm.components.elong.dao.ElongHotelsDAO;
import com.mm.components.elong.domain.ElongHotels;
import com.mm.components.elong.domain.ElongImages;
import com.mm.components.elong.domain.ElongRooms;
import com.mm.exception.MMException;
import com.mm.util.StringUtils;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 解析elong Hotel_cn.xml文件
* <p>Title: </p>
* <p>Description: </p>
* <p>Copyright: Copyright (c) 2011</p>
* <p>Company: Yododo</p>
* <p>Date: 2011-2-15</p>
* <p>Time: 15:01:14</p>
*
* @author eng
* $Header: $
* $Revision: $
* $Author: $
*/
public class AnalyticElongHotelXml extends DefaultHandler {
private ElongHotelsDAO elongHotelsDAO = null;
java.util.Stack tags = new java.util.Stack();
private String fatherQName = "";
private boolean isFromEndElement = false;
private static List<ElongHotels> hotels = null;
private static List<ElongImages> images = null;
private static List<ElongRooms> rooms = null;
private ElongHotels hotel = null;
private ElongImages image = null;
private ElongRooms room = null;
public AnalyticElongHotelXml() {
super();
}
public AnalyticElongHotelXml(ElongHotelsDAO elongHotelsDAO) {
super();
this.elongHotelsDAO = elongHotelsDAO;
}
@Override
public void characters(char ch[], int start, int length) throws SAXException {
String tag = (String) tags.peek();
String value = new String(ch, start, length);
if(isFromEndElement){
if(fatherQName.equals("image")){
initImageMsg(tag,value);
}else if(fatherQName.equals("airportPickUpService")){
}else if(fatherQName.equals("generalAmenities")){
if("Overview".equals(tag)){
hotel.setGeneralamenities(value);
}
}
}
}
@Override
public void startElement(String uri,String localName,String qName, Attributes attrs) {
tags.push(qName);
isFromEndElement = true;
if(qName.equals("HotelDetail")){
hotel = new ElongHotels();
}else if(qName.equals("image")){
fatherQName = "image";
image = new ElongImages();
}
}
@Override
public void endElement(String uri,String localName,String qName)
throws SAXException {
isFromEndElement = false;
if(qName.equals("HotelDetail")){
hotel.setImgurl(getoneimage());
hotels.add(hotel);
if(qName.equals("image")){
image.setHotelid(hotel.getHotelid());
images.add(image);
fatherQName = "";
image = null;
}else if(qName.equals("airportPickUpService")){
fatherQName = "";
}
}
private void initHotelMsg(String tag , String value){
if("id".equals(tag)){
hotel.setHotelid(value);
}else if("dateUpdated".equals(tag)){
}else if("name".equals(tag)){
hotel.setName(value);
}
}
private void initImageMsg(String tag , String value){
if("imgUrl".equals(tag)){
image.setImgurl(value);
}else if("imgType".equals(tag)){
image.setImgtype(Integer.parseInt(value));
}else if("title".equals(tag)){
image.setTitle(value);
}
}
private void initRoomMsg(String tag , String value){
if("roomTypeId".equals(tag)){
room.setRoomtypeid(value);
}else if("roomName".equals(tag)){
room.setRoomname(value);
}else if("roomTypeNum".equals(tag)){
room.setRoomtypenum(Integer.parseInt(value));
}
}
public static List<ElongRooms> getRooms() {
return rooms;
}
public static List<ElongImages> getImages() {
return images;
}
public static List<ElongHotels> getHotels() {
return hotels;
}
public static void main(String args[]) {
long lasting = System.currentTimeMillis();
try {
SAXParserFactory sf = SAXParserFactory.newInstance();
SAXParser sp = sf.newSAXParser();
AnalyticElongHotelXml reader = new AnalyticElongHotelXml();
sp.parse(new InputSource("D:\\Projects\\tour\\src\\com\\elong\\message\\Hotel_cn.xml"), reader);
System.out.println("success");
for(ElongHotels s : hotels){
if(s.getHotelid().equals("03")){
System.out.println(s.getName());
}
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("运行时间:" + (System.currentTimeMillis() - lasting) + "毫秒");
}
}
sp.parse(new InputSource("D:\\Projects\\tour\\src\\com\\elong\\message\\Hotel_cn.xml"),analyticElongHotelXml);
我们可以重写DefaultHandler类的characters,startElement等方法,在里面实现自己需要的业务逻辑