上个月工作中,遇到了一个超大的xml文件解析工作。然后按照以往的方法jdom试了一下,然后jvm内存溢出了。没得办法,只有上网寻找万能的网友们了。花了接近一上午的时间终于写好了一个实用符合编码习惯的工具给大家分享一下。
需要引入的jar包:
<dependency>
<groupId>javax.persistence</groupId>
<artifactId>persistence-api</artifactId>
<version>1.0.2</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.4</version>
</dependency>
<dependency>
<groupId>org.jdom</groupId>
<artifactId>jdom</artifactId>
<version>2.0.2</version>
</dependency>
第一段代码,
package com.kernel.util;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.List;
/**
* 自定义xml解析工具
* 更新时间:2019/9/6
*/
public class MyXmlHandler extends DefaultHandler {
private Class<?> clazz; //反射对象
private List<Object> list; //解析结果
private String element; //获取的节点的名称
private List<String> fieldList; //反射对象的字段的名称
private Object entity; //实例化的反射对象
private String rootName; //xml中相对根节点
private StringBuffer value; //现有标签里的值,预防ch[]数组里面获取到的不是一个完整的xml
public MyXmlHandler(Object entity) {
initHandler(entity.getClass());
}
public MyXmlHandler(Class<?> clazz) {
initHandler(clazz);
}
public MyXmlHandler(Object entity, String rootName) {
initHandler(entity.getClass(), rootName);
}
public MyXmlHandler(Class<?> clazz, String rootName) {
this.initHandler(clazz, rootName);
}
private void initHandler(Class<?> clazz) {
initHandler(clazz, clazz.getSimpleName());
}
private void initHandler(Class<?> clazz, String rootName) {
this.clazz = clazz;
this.rootName = rootName;
fieldList = new ArrayList<>();
Field[] fields = clazz.getDeclaredFields();
for (Field f : fields) {
fieldList.add(f.getName());
}
}
@Override
public void startDocument() throws SAXException {
super.startDocument();
list = new ArrayList<>();
}
@Override
public void endDocument() throws SAXException {
super.endDocument();
fieldList.clear();
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
super.startElement(uri, localName, qName, attributes);
if (qName.equalsIgnoreCase(rootName)) {
try {
entity = clazz.newInstance();
} catch (Exception e) {
System.out.println("init new entity error:" + clazz.getName() + "\t" + qName + "\t" + e.getMessage());
}
}
value = new StringBuffer();
element = qName;
}
@Override
public void endElement(String uri, String localName, String qName) {
if (element != null && entity != null) {
initEntityField(value.toString());
}
if (qName.equalsIgnoreCase(rootName)) {
list.add(entity);
}
element = null;
}
@Override
public void characters(char[] ch, int start, int length) {
String content = new String(ch, start, length);
value.append(content);
}
/**
* 给对象的属性赋值
*
* @param elementValue 现有字段的值
*/
private void initEntityField(String elementValue) {
try {
if (fieldList.stream().anyMatch(s -> s.equalsIgnoreCase(element))) {
String name = fieldList.stream().filter(s -> s.equalsIgnoreCase(element)).findFirst().get();
Field field = clazz.getDeclaredField(name);
field.setAccessible(true);
Object value = getValidValue(field, elementValue);
field.set(entity, value);
}
} catch (Exception e) {
System.out.println("set entity`s value error:" + clazz.getName() + "\t" + element + "\t" + e.getMessage());
}
}
/**
* 获取实例对象的属性的有效值,可根据需要继续增加其他基本类型
*/
private static Object getValidValue(Field field, String content) {
Object value = null;
String flag = field.getGenericType().getTypeName();
switch (flag) {
case "java.lang.String":
value = content;
break;
case "int":
value = Integer.parseInt(content);
break;
case "java.util.Date":
value = DateUtil.parseDate(content);
break;
default:
break;
}
return value;
}
public List<Object> getList() {
return list;
}
}
第二处工具类封装上述工具的代码,注意是readXml方法封装的上述代码。
package com.kernel.util;
import com.kernel.entity.User;
import org.apache.commons.lang3.time.DateFormatUtils;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.input.SAXBuilder;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
public class XmlUtil {
/**
* 生成xml
*
* @param element 数据区分节点
* @param obj 对象的实例
*/
public static void initCreateXML(Element element, Object obj) {
//反射对象的属性,获取所有字段
Field[] fields = obj.getClass().getDeclaredFields();
for (Field f : fields) {
try {
f.setAccessible(true);
Element e = new Element(f.getName().toUpperCase());
Object o = f.get(obj);
if (o instanceof Number) {
e.setText((Number) o + "");//装箱后建议使该方法
} else if (o instanceof Date) {//commons-lang3.jar中的方法
String d = DateFormatUtils.format((Date) o, "yyyy-MM-dd HH:mm:ss");
e.setText(d);
} else {
e.setText(String.valueOf(o));
}
element.addContent(e);
} catch (IllegalAccessException e1) {
e1.printStackTrace();
}
}
}
/**
* 解析xml文件
*/
public static List<Object> readXml(File file, Object entity) {
return readXml(file, entity, null);
}
/**
* 解析xml文件,反射对象的字段名同xml的节点名称。
* 便于减少内存占用,快速解析xml文件,比jdom2的全文加载更快
*
* @param file xml文件
* @param entity xml反射的对象
* @param rootName 相对节点名称
*/
public static List<Object> readXml(File file, Object entity, String rootName) {
List<Object> list = new ArrayList<>();
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
factory.setValidating(true);
SAXParser parser = factory.newSAXParser();
MyXmlHandler handler;
if (rootName == null) {
handler = new MyXmlHandler(entity);
} else {
handler = new MyXmlHandler(entity, rootName);
}
parser.parse(file, handler);
list.addAll(handler.getList());
} catch (Exception e) {
e.printStackTrace();
}
return list;
}
public static void readXmlByJDOM(File file){
try {
SAXBuilder saxBuilder=new SAXBuilder();
InputStream in=new FileInputStream(file);
Document doc=saxBuilder.build(in);
Element root=doc.getRootElement();
List<Element>elementList=root.getChildren();
System.out.println(elementList.size());
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
File file=new File("F:\\test\\fileTest\\user01.xml");
// readXmlByJDOM(file);
List<Object> users=readXml(file,new User());
System.out.println(users.size());
}
}
第三处代码:user实体类,字段上的一些注解可以先不管
package com.kernel.entity;
import com.fasterxml.jackson.annotation.JsonFormat;
import org.springframework.context.annotation.PropertySource;
import org.springframework.format.annotation.DateTimeFormat;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
import javax.persistence.Table;
import java.io.Serializable;
import java.util.Date;
//persistence-api
@Entity
@Table(name = "user")
public class User implements Serializable {
@Id
@Column(name = "id")
private String id;
@Column(name = "username")
private String username;
@Column(name = "gender")
private String gender;
@Column(name = "age")
private int age;
@Column(name = "phone")
private String phone;
@Column(name = "email")
private String email;
@Column(name = "birth")
@DateTimeFormat(pattern = "yyyy-MM-dd")//前台到后端的转化
private Date birth;
@Column(name = "addtime")
private Date addtime;
@Column(name = "role")
private String role;
public String aq;
private static String ad;
// private static final String ad1 = "123tttvvv";
public User() {
}
public User(String id, String username, String gender, int age, String phone, String email, Date birth, Date addtime, String role) {
this.id = id;
this.username = username;
this.gender = gender;
this.age = age;
this.phone = phone;
this.email = email;
this.birth = birth;
this.addtime = addtime;
this.role = role;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public String getGender() {
return gender;
}
public void setGender(String gender) {
this.gender = gender;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
public String getPhone() {
return phone;
}
public void setPhone(String phone) {
this.phone = phone;
}
public String getEmail() {
return email;
}
public void setEmail(String email) {
this.email = email;
}
@JsonFormat(timezone = "GMT+8",pattern = "yyyy-MM-dd")//后端到前端的转化
public Date getBirth() {
return birth;
}
public void setBirth(Date birth) {
this.birth = birth;
}
public Date getAddtime() {
return addtime;
}
public void setAddtime(Date addtime) {
this.addtime = addtime;
}
public String getRole() {
return role;
}
public void setRole(String role) {
this.role = role;
}
}
测试的xml文件模板,为减少排版,这里就少显示点
<?xml version="1.0" encoding="utf-8"?>
<USERS version="1.0">
<user>
<ID>a0</ID>
<USERNAME>jack0c5e5e8e-64f5-4346-9c3f-83aad186c7d3</USERNAME>
<GENDER>m</GENDER>
<AGE>12</AGE>
<PHONE>110</PHONE>
<EMAIL>86da9724-70a5-443f-a9f8-5cf86f7c4cec110@com</EMAIL>
<BIRTH>2019-06-02 14:51:16</BIRTH>
<ADDTIME>2019-06-02 14:51:16</ADDTIME>
<ROLE>员工0</ROLE>
<AQ>null</AQ>
<AD>null</AD>
</user>
<user>
<ID>a1</ID>
<USERNAME>jack9bdc3093-efa6-46ad-b877-da3c256f5b6a</USERNAME>
<GENDER>m</GENDER>
<AGE>12</AGE>
<PHONE>110</PHONE>
<EMAIL>c0d1e737-2cec-41d5-8629-b5db44ccc640110@com</EMAIL>
<BIRTH>2019-06-02 14:51:17</BIRTH>
<ADDTIME>2019-06-02 14:51:17</ADDTIME>
<ROLE>员工1</ROLE>
<AQ>null</AQ>
<AD>null</AD>
</user>
</USERS>