工作中遇到要处理30M左右的文件,测试了下dom4j,最多10M多点,11M就 out of memory, 于是最后选择了直接用sax进行解析:
ReadXMLFileSAX类:
package xml;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class ReadXMLFileSAX {
public static void main(String argv[]) {
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
DefaultHandler handler = new DefaultHandler() {
boolean bfname = false;
boolean blname = false;
boolean bnname = false;
boolean bsalary = false;
boolean bstaff = false;
public void startElement(String uri, String localName,
String qName, Attributes attributes)
throws SAXException {
System.out.println("Start Element :" + qName);
if (qName.equalsIgnoreCase("firstname")) {
bfname = true;
}
if (qName.equalsIgnoreCase("lastname")) {
blname = true;
}
if (qName.equalsIgnoreCase("nickname")) {
bnname = true;
}
if (qName.equalsIgnoreCase("salary")) {
bnname = true;
}
for (int i = 0; i < attributes.getLength(); i++) {
System.out.println("attribute name:"
+ attributes.getQName(i));
System.out.println("attribute value:"
+ attributes.getValue(i));
}
}
public void endElement(String uri, String localName, String qName) throws SAXException {
System.out.println("End Element :" + qName);
}
public void characters(char ch[], int start, int length)
throws SAXException {
if (bfname) {
System.out.println("description : " + new String(ch, start, length));
bfname = false;
}
if (blname) {
System.out.println("orderContent : " + new String(ch, start, length));
blname = false;
}
if (bnname) {
System.out.println("Nick Name : " + new String(ch, start, length));
bnname = false;
}
if (bsalary) {
System.out.println("Salary : " + new String(ch, start, length));
bsalary = false;
}
if (bstaff) {
System.out.println(new String(ch, start, length));
bstaff = false;
}
}
};
saxParser.parse("test.xml", handler);
} catch (Exception e) {
e.printStackTrace();
}
}
}
xml文件:
<?xml version="1.0"?> <company> <staff name="aaaaa"> <firstname>yong</firstname> <lastname>mook kim</lastname> <nickname>mkyong</nickname> <salary>100000</salary> </staff> <staff name="bbbbbb"> <firstname>low</firstname> <lastname>yin fong</lastname> <nickname>fong fong</nickname> <salary>200000</salary> </staff> </company>
注:可以在startElement方法的attributes中获取element的属性信息,然后再endElement中让element与它的attribute对应。由于xml文件很大,尽量不要把解析的所有信息放在内存,譬如集合多少信息,插入数据库,再清空,再继续下面的解析。