XML(extensiable markup language):可扩展标记语言。
解析XML在java中有两大类,四种解析方式:
一、DOM(Document Object Model)解析:
为 XML 文档的已解析版本定义了一组接口。解析器读入整个文档,然后构建一个驻留内存的树结构, 然后代码就可以使用 DOM 接口来操作这个树结构。
优点:整个文档树在内存中,便于操作;支持删除、修改、重新排列等多种功能;
缺点:将整个文档调入内存(包括无用的节点),浪费时间和空间;
使用场合:一旦解析了文档还需多次访问这些数据;硬件资源充足(内存、CPU)。
public class DomDemo {
public static List<Student> generateStudentFromXml() {
List<Student> stus = new ArrayList<Student>();
// 通过文档构建工厂获得一个文档构建对象
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
try {
//获取文档解析器
DocumentBuilder db = factory.newDocumentBuilder();
//通过解析器获得文档树
Document document = db.parse(new File("d:\\student.xml"));
// 获得同名的所有元素节点
NodeList nl = document.getElementsByTagName("student");
System.out.println(nl.getLength());
for (int index = 0; index < nl.getLength(); index++) {
// 获得每一个子节点
System.out.println("student 子节点。。。。。。。");
Student stu = new Student();
Node node = nl.item(index);
// 遍历所有的属性,并获得属性名字和值
if (node.hasChildNodes()) {
//将当前的Node的所有的属性及值装入Map
NamedNodeMap attributes = node.getAttributes();
for (int i = 0; i < attributes.getLength(); i++) {
Node attr = attributes.item(i);
if (attr.getNodeName().equals("stuid")) {
//获取节点的值
stu.setStuid(Integer.parseInt(attr.getNodeValue()));
}
if (attr.getNodeName().equals("stuname")) {
stu.setStuname(attr.getNodeValue());
}
}
}
NodeList childList = node.getChildNodes();
for (int i = 0; i < childList.getLength(); i++) {
Node childNode = childList.item(i);
if (childNode instanceof Element) {
String childNodeName = childNode.getNodeName();
//获取标签体文本内容
String childNodeValue = childNode.getTextContent();
if (childNodeName.equals("address")) {
stu.setAddress(childNodeValue);
}
if (childNodeName.equals("inschool")) {
stu.setInschool(new SimpleDateFormat("yyyy-MM-dd")
.parse(childNodeValue));
}
}
}
stus.add(stu);
}
} catch (NumberFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (DOMException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return stus;
}
public static void createXMLFile() {
Document doc;
// 声明xml文档中的各个元素对象
Element students, stus[];
Element address = null;
Element inschool = null;
List<Student> list = new ArrayList<Student>();
try {
list.add(new Student(222, "aaa", "Beijinghaidian", new SimpleDateFormat(
"yyyy-MM-dd").parse("2012-4-14")));
list.add(new Student(333, "bbb", "Beijingxicheng", new SimpleDateFormat(
"yyyy-MM-dd").parse("2012-4-24")));
list.add(new Student(444, "ccc", "BeijingDongcheng", new SimpleDateFormat(
"yyyy-MM-dd").parse("2012-4-22")));
list.add(new Student(555, "ddd", "BeingjingXuanwu", new SimpleDateFormat(
"yyyy-MM-dd").parse("2012-4-23")));
list.add(new Student(666, "eee", "BeingjingChaoyang", new SimpleDateFormat(
"yyyy-MM-dd").parse("2012-4-24")));
} catch (ParseException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
stus = new Element[list.size()];
try {
// 得到dom解析器的工厂实例
DocumentBuilderFactory factory = DocumentBuilderFactory
.newInstance();
// 从dom工厂实例获得dom解析器
DocumentBuilder builder = factory.newDocumentBuilder();
// 创建文档树模型对象
doc = builder.newDocument();
// 如果创建的文档树模型不为空
if (null != doc) {
// 创建students元素
students = doc.createElement("students");
// 创建student元素
for (int index = 0; index < stus.length; index++) {
stus[index] = doc.createElement("student");
stus[index].setAttribute("stuid",
String.valueOf(list.get(index).getStuid()));
stus[index].setAttribute("stuname", list.get(index)
.getStuname());
// 创建address元素
address = doc.createElement("address");
// 将一个文本结点添加为address元素的子结点
address.appendChild(doc.createTextNode(list.get(index)
.getAddress()));
// 将address元素添加为student的子元素
stus[index].appendChild(address);
inschool = doc.createElement("inschool");
inschool.appendChild(doc
.createTextNode(new SimpleDateFormat("yyyy-MM-dd")
.format(list.get(index).getInschool())));
stus[index].appendChild(inschool);
// 将student元素添加为students的子元素
students.appendChild(stus[index]);
}
// 将students元素作为根元素添加到xml文档树中
doc.appendChild(students);
// 将内存中的文档树保存为students.xml文档
Transformer transformer = TransformerFactory.newInstance()
.newTransformer();// 得到转换器
// 设置换行
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
// 写入文件
transformer.transform(new DOMSource(doc), new StreamResult(
new File("d:/student.xml")));
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
二、SAX(Simple Application for Xml)解析
事件驱动。当解析器发现元素开始、元素结束、文本、文档的开始或结束等时,发送事件,程序员编写响应这些事件的代码,保存数据。
优点:不用事先调入整个文档,占用资源少;SAX解析器代码比DOM解析器代码小,适于Applet(小应用程序)、手机等移动终端,下载。
缺点:不是持久的;事件过后,若没保存数据,那么数据就丢了;
无状态性;从事件中只能得到文本,但不知该文本属于哪个元素;
使用场合:Applet;只需XML文档的少量内容,很少回头访问;机器内存少;
public class SAXDemo extends DefaultHandler {
private List<Student>stus;
private Student student;
private String tagName;
//开始读取xml事件
@Override
public void startDocument() throws SAXException {
stus = new ArrayList<Student>();
}
public List<Student>getAllStudent(InputStream is){
try {
SAXParserFactory spf = SAXParserFactory.newInstance();
//获取解析器
SAXParser sp = spf.newSAXParser();
//进行解析,将DefualtHandler传入
sp.parse(is, this);
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return stus;
}
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
System.out.println("start........."+qName);
if(qName.equals("student")){
student = new Student();
student.setStuid(Integer.parseInt(attributes.getValue(0)));
student.setStuname(attributes.getValue(1));
}
tagName = qName;
}
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
if(qName.equals("student"))
stus.add(student);
tagName = null;
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
//System.out.println(String.valueOf(ch));
try {
if(tagName!=null){
System.out.println("元素节点的textContent......."+tagName);
if(tagName.equals("address"))
student.setAddress(new String(ch,start,length));
if(tagName.equals("inschool"))
student.setInschool(new SimpleDateFormat("yyyy-MM-dd").parse(new String(ch,start,length)));
}
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
三、JDOM解析
为减少DOM、SAX的编码量,出现了JDOM, JDOM在DOM的基础上降低了复杂度,易用性强。
优点:极大减少了代码量。
使用场合:要实现的功能简单,如解析、创建等.
public class JDomDemo {
public void createXml(String fileName,List<Student> stus) {
Document document;
Element root;
root = new Element("students");
document = new Document(root);
for(int i = 0;i<stus.size();i++){
Student stu = stus.get(i);
Element student = new Element("student");
root.addContent(student);
student.setAttribute("stuid", stu.getStuid()+"");
student.setAttribute("stuname",stu.getStuname());
Element address = new Element("address");
student.addContent(address);
address.setText(stu.getAddress());
Element inschool = new Element("inschool");
inschool.setText(new SimpleDateFormat("yyyy-MM-dd").format(stu.getInschool()));
student.addContent(inschool);
}
XMLOutputter XMLOut = new XMLOutputter();
try {
XMLOut.output(document, new FileOutputStream(fileName));
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public List<Student> parserXml(String fileName) {
List<Student> stus = new ArrayList<Student>();
SAXBuilder builder = new SAXBuilder(false);
try {
Document document = builder.build(fileName);
Element students = document.getRootElement();
List<Element> studentList = students.getChildren("student");
for (int i = 0; i < studentList.size(); i++) {
Element student = (Element) studentList.get(i);
Student stu = new Student();
stu.setStuid(Integer.parseInt(student.getAttributeValue("stuid")));
stu.setStuname(student.getAttributeValue("stuname"));
List<Element> studentInfo = student.getChildren();
for (int j = 0; j < studentInfo.size(); j++) {
String tagName = ((Element)studentInfo.get(j)).getName();
String value = ((Element) studentInfo.get(j)).getValue();
if(tagName.equals("address")){
stu.setAddress(value);
}
if(tagName.equals("inschool")){
stu.setInschool(new SimpleDateFormat("yyyy-MM-dd").parse(value));
}
}
stus.add(stu);
}
} catch (JDOMException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}catch(Exception e){
e.printStackTrace();
}
return stus;
}
}
四、DOM4J(DOM for Java)
DOM4J 结合了SAX和 JDOM的优点,是一个非常非常优秀的Java XML API,具有性能优异、功能强大和极端易用使用的特点,灵活,效率高,同时它也是一个开放源代码的软件。如今你可以看到越来越多的 Java 软件都在使用 DOM4J 来读写 XML,特别值得一提的是连 Sun 的 JAXM 也在用DOM4J。
public class Dom4jDemo {
public static List<Student> parseXmlByDom4j(String path) {
List<Student> stus = new ArrayList<Student>();
File file = new File(path);
// ①创建SAXReader对象
SAXReader reader = new SAXReader();
try {
// ②读取xml文档,并封装成Document对象
Document document = reader.read(file);
// ③获取students根元素对象
Element rootEle = document.getRootElement();
// ④获得根元素students下的所有子元素
List<Element> list = rootEle.elements();
// ⑤提取中的数据
Iterator<Element> it = list.iterator();
// ⑥遍历每一个元素,并提取数据
while (it.hasNext()) {
// ⑦获得元素
Element e = it.next();
int stuid = 0;
String stuname = "";
if (e.getName().equals("student")) {
stuid = Integer.parseInt(e.attributeValue("stuid"));
stuname = e.attributeValue("stuname");
}
// ⑧提取元素中的子元素的文本
String address = new String(e.elementText("address").getBytes(
"gbk"), "utf-8");
Date inschool = new SimpleDateFormat("yyyy-MM-dd").parse(e
.elementText("inschool"));
Student stu = new Student(stuid, stuname, address, inschool);
stus.add(stu);
}
} catch (DocumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
return stus;
}
public static void createXml(List<Student> stus, String fileName) {
Document document = DocumentHelper.createDocument();
Element students = document.addElement("students");
try {
for (Student stu : stus) {
Element student = students.addElement("student");
student.addAttribute("stuid", stu.getStuid() + "");
student.addAttribute("stuname", new String(stu.getStuname().getBytes("utf-8"),"gbk"));
Element address = student.addElement("address");
address.setText(stu.getAddress());
Element inschool = student.addElement("inschool");
inschool.setText(new SimpleDateFormat("yyyy-MM-dd").format(stu
.getInschool()));
}
Writer fileWriter = new FileWriter(fileName);
XMLWriter xmlWriter = new XMLWriter(fileWriter);
xmlWriter.write(document);
xmlWriter.close();
} catch (IOException e) {
System.out.println(e.getMessage());
}
}
}