# Java API for XML Processing (JAXP)
# Study Note of JAVA Tutorial
Simple API for XML APIs
java xml parsing tutorial: https://docs.oracle.com/javase/tutorial/jaxp/sax/
SAX 采用事件模型来解析 XML 文档,是解析 XML 文档的一种更快速、更轻量的方法。 利用 SAX 可以对 XML 文档进行有选择的解析和访问,而不必像 DOM 那样加载整个文档,因此它对内存的要求较低。 但 SAX 对 XML 文档的解析为一次性读取,不创建任何文档对象,很难同时访问文档中的多处数据。
SAX 解析器接口和事件处理器接口定义在 org.xml.sax 包中。主要的接口包括 ContentHandler、DTDHandler、EntityResolver 及 ErrorHandler。 其中 ContentHandler 是主要的处理器接口,用于处理基本的文档解析事件;DTDHandler 和 EntityResolver 接口用于处理与 DTD 验证和实体解析相关的事件; ErrorHandler 是基本的错误处理接口。DefaultHandler 类实现了上述四个事件处理接口。下面的例子中 MyXmlHandler 继承了 DefaultHandler 类, 并覆盖了其中的五个回调方法 startDocument()、endDocument()、startElement()、endElement() 及 characters() 以加入自己的事件处理逻辑。
![](https://images.cnblogs.com/OutliningIndicators/ContractedBlock.gif)
![](https://images.cnblogs.com/OutliningIndicators/ExpandedBlockStart.gif)
package sax; import java.util.HashMap; import java.util.Iterator; import java.util.Set; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.helpers.DefaultHandler; public class MyXmlHandler extends DefaultHandler{ /* * to implements ContentHandler */ private HashMap<String, Integer> tags; private String tag; // Parser calls this once before parsing a document @Override public void startDocument() throws SAXException { tags = new HashMap<String, Integer>(); System.out.println("xml sax decoding begin ..."); } /** * Start processing of an element. * @param namespaceURI Namespace_URI * @param localName The local name, without prefix * @param qName The qualified name, with prefix * @param atts The attributes of the element */ // Parser calls this for each element in a document @Override public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { String key = qName; tag = qName; System.out.println("\nanalyse element begin:"+qName); Object value = tags.get(key); if (value == null) { // Add a new entry tags.put(key, new Integer(1)); } else { // Get the current count and increment it int count = ((Integer)value).intValue(); count++; tags.put(key, new Integer(count)); } } @Override public void characters(char[] ch, int start, int length) { // Processing character data inside an element if (ch != null && start >0 && length >0 ){ System.out.println("+++++++++++++++++++++++++++++++++++++"); System.out.println("content begin<"+new String(ch,start,length)+">end"); System.out.println("+++++++++++++++++++++++++++++++++++++"); } } // Parser calls this for each element in a document @Override public void endElement(String namespaceURI, String localName, String qName) throws SAXException { tag = null; // End of processing current element System.out.println("End analysing element:"+qName+"\n"); } // Parser calls this once after parsing a document @Override public void endDocument() throws SAXException { System.out.println("xml sax decoding end, decode results are as follow:"); Set<String> e = tags.keySet(); for (Iterator<String> iter = e.iterator(); iter.hasNext();) { String tag = iter.next(); int count = ((Integer)tags.get(tag)).intValue(); System.out.println("Local Name \"" + tag + "\" occurs " + count + " times"); } } /* * to implements Validation && ErrorHandler */ //Returns a string describing parse exception details private String getParseExceptionInfo(SAXParseException spe) { String systemId = spe.getSystemId(); if (systemId == null) { systemId = "null"; } String info = "URI=" + systemId + " Line=" + spe.getLineNumber() + ": " + spe.getMessage(); return info; } // The following methods are standard SAX ErrorHandler methods. @Override public void warning(SAXParseException spe) throws SAXException { System.out.println("Warning: " + getParseExceptionInfo(spe)); } @Override public void error(SAXParseException spe) throws SAXException { String message = "Error: " + getParseExceptionInfo(spe); throw new SAXException(message); } @Override public void fatalError(SAXParseException spe) throws SAXException { String message = "Fatal Error: " + getParseExceptionInfo(spe); throw new SAXException(message); } }
SAXParserFactory
SAXParserFactory类的对象根据不同的参数构建parser类的实例
SAXParser
SAXParser接口定义了几种parse方法.通常情况下,将一个XML数据源和一个DefaultHander对象解析器传递给parser,parser对象将会处理XML数据和调用DefaultHander中对应的方法
XMLReader
SAXParser封装了一个XMLReader,可以使用SAXParser的getxmlreader方法来配置它.这里执行了你预先定义的SAX事件处理方法
DefaultHandler
一个defaulthandler实现ContentHandler,ErrorHander,dtdhandler,和entityresolver接口(空的方法),所以你可以只重写那些你感兴趣的方法。
ContentHandler
当识别xml标签时,方法startDocument, endDocument, startElement,和 endElement将会被调用。当解析器遇到文本XML元素或内联处理指令时,characters()和processinginstruction()将会被调用。
ErrorHander
默认的错误处理程序将抛出一个异常错误,并忽略其他错误(包括验证错误).为了确保正确的处理,需要提供自己的错误处理方法。
![](https://images.cnblogs.com/OutliningIndicators/ContractedBlock.gif)
![](https://images.cnblogs.com/OutliningIndicators/ExpandedBlockStart.gif)
package sax; import java.io.File; import java.io.IOException; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.SAXException; import org.xml.sax.SAXNotRecognizedException; import org.xml.sax.XMLReader; public class saxparser { // add for XML schema static final String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage"; static final String W3C_XML_SCHEMA = "http://www.w3.org/2001/XMLSchema"; static final String JAXP_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource"; public static void main(String[] args) throws SAXException, IOException, ParserConfigurationException{ /* * parameters for parser * filename : XML file name * dtdValidate : if true, do DTD validation * xsdValidate : if true, do W3C XML Schema validation * schemaSource: schema source file */ String filename = "/sandbox/xmltest/book.xml"; boolean dtdValidate = false; boolean xsdValidate = false; String schemaSource = null; SAXParserFactory saxparserfactory = SAXParserFactory.newInstance(); /* * Set namespaceAware to true to get a parser that corresponds to * the default SAX2 NameSpace feature setting. This is necessary * because the default value from JAXP 1.0 was defined to be false. */ saxparserfactory.setNamespaceAware(false); // Validation part 1: set whether validation is on(true) saxparserfactory.setValidating(dtdValidate || xsdValidate); // Create a JAXP SAXParser SAXParser parser = saxparserfactory.newSAXParser(); // Validation part 2a: set the schema language if necessary if (xsdValidate) { try { parser.setProperty(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA); } catch (SAXNotRecognizedException x) { // This can happen if the parser does not support JAXP 1.2 System.err.println( "Error: JAXP SAXParser property not recognized: " + JAXP_SCHEMA_LANGUAGE); System.exit(1); } } //Validation part 2b: Set the schema source. if (schemaSource != null) { parser.setProperty(JAXP_SCHEMA_SOURCE, new File(schemaSource)); } // parse the XML document using xmlHandler MyXmlHandler MyHandler = new MyXmlHandler(); // Get the encapsulated SAX XMLReader XMLReader xmlReader = parser.getXMLReader(); // Set the ContentHandler of the XMLReader xmlReader.setContentHandler(MyHandler); // Set an ErrorHandler before parsing xmlReader.setErrorHandler(MyHandler); // Tell the XMLReader to parse the XML document xmlReader.parse(filename); } }
测试XML 文件:
<?xml version="1.0" encoding="UTF-8"?> <books> <book id="001"> <title>Harry Potter</title> <author>J K. Rowling</author> </book> <book id="002"> <title>Learning XML</title> <author>Erik T. Ray</author> </book> </books>
OUTPUT :
![](https://images.cnblogs.com/OutliningIndicators/ContractedBlock.gif)
![](https://images.cnblogs.com/OutliningIndicators/ExpandedBlockStart.gif)
xml sax decoding begin ... analyse element begin:books +++++++++++++++++++++++++++++++++++++ content begin< >end +++++++++++++++++++++++++++++++++++++ analyse element begin:book +++++++++++++++++++++++++++++++++++++ content begin< >end +++++++++++++++++++++++++++++++++++++ analyse element begin:title +++++++++++++++++++++++++++++++++++++ content begin<Harry Potter>end +++++++++++++++++++++++++++++++++++++ End analysing element:title +++++++++++++++++++++++++++++++++++++ content begin< >end +++++++++++++++++++++++++++++++++++++ analyse element begin:author +++++++++++++++++++++++++++++++++++++ content begin<J K. Rowling>end +++++++++++++++++++++++++++++++++++++ End analysing element:author +++++++++++++++++++++++++++++++++++++ content begin< >end +++++++++++++++++++++++++++++++++++++ End analysing element:book +++++++++++++++++++++++++++++++++++++ content begin< >end +++++++++++++++++++++++++++++++++++++ analyse element begin:book +++++++++++++++++++++++++++++++++++++ content begin< >end +++++++++++++++++++++++++++++++++++++ analyse element begin:title +++++++++++++++++++++++++++++++++++++ content begin<Learning XML>end +++++++++++++++++++++++++++++++++++++ End analysing element:title +++++++++++++++++++++++++++++++++++++ content begin< >end +++++++++++++++++++++++++++++++++++++ analyse element begin:author +++++++++++++++++++++++++++++++++++++ content begin<Erik T. Ray>end +++++++++++++++++++++++++++++++++++++ End analysing element:author +++++++++++++++++++++++++++++++++++++ content begin< >end +++++++++++++++++++++++++++++++++++++ End analysing element:book +++++++++++++++++++++++++++++++++++++ content begin< >end +++++++++++++++++++++++++++++++++++++ End analysing element:books xml sax decoding end, decode results are as follow: Local Name "books" occurs 1 times Local Name "author" occurs 2 times Local Name "book" occurs 2 times Local Name "title" occurs 2 times