DOM4J 集成了dom树和sax方式来读写xml文件。对于生成几百M或者上G的xml文件,本文提供了使用dom4j的sax+dom方式,既能降低内存的使用,又可以降低操作xml的复杂度。
@Test
public void bigXmlForSax() throws Exception{
String filePath = ""; //补充自己定义的路径
Document document = DocumentHelper.createDocument();
Element root = document.addElement("users");
root.addAttribute("name","users");
Element user= DocumentHelper.createElement("user");
user.addAttribute("name","username");
user.addElement("id").addAttribute("name","userId").setText("this is user's ID number");
//user.setParent(root); //使用sax方式写xml可以不用在此添加关系
XMLWriter writer = null;
try{
FileWriter out = new FileWriter(filePath);
OutputFormat compactPrint = OutputFormat.createPrettyPrint();
compactPrint.setEncoding("UTF-8");
writer = new XMLWriter(out,compactPrint);
writer.startDocument();
writer.writeOpen(root);
for(int i=1;i<=7000000;i++){
writer.write(user);
if(i%1000000 == 0){
GeneralUtil.printMemoryInfo();
}
}
writer.writeClose(root);
writer.endDocument();
}catch (SAXException e){
e.printStackTrace();
}catch (IOException e){
e.printStackTrace();
}finally {
if(writer!=null)writer.close();
}
}
@Slf4j
public class GeneralUtil {
public static void printMemoryInfo(){
long totalMemory = Runtime.getRuntime().totalMemory();
long freeMemory = Runtime.getRuntime().freeMemory();
log.info("总内存:{} M",totalMemory/1024/1024 );
log.info("已使用:{} M",(totalMemory - freeMemory)/1024/1024 );
log.info("剩余内存百分比:{} %",((double)freeMemory/totalMemory)*100 );
}
}
参考
http://tankmarshal.iteye.com/blog/1622490
https://github.com/dom4j/dom4j/blob/version-2.0.2/src/test/java/org/dom4j/XMLWriterTest.java