DTD和schema小总结

最新推荐文章于 2024-09-14 17:02:52 发布

weixin_34122548

最新推荐文章于 2024-09-14 17:02:52 发布

阅读量118

点赞数

文章标签：人工智能 python

原文链接：https://my.oschina.net/heroShane/blog/204145

版权

2019独角兽企业重金招聘Python工程师标准>>>

一、DTD简单介绍

上面的是DTD存在于xml文件里的场合，分离时是怎么设置的呢？（差异不大，在xml中使用SYSTEM引入便可）

DTD文件：

<?xml version="1.0" encoding="UTF-8"?>
<!ELEMENT persons (person+)>
<!ELEMENT person (sex,name,age,messages+)>
<!ATTLIST person id ID #REQUIRED>
<!ELEMENT sex (#PCDATA)>
<!ELEMENT name (#PCDATA)>
<!ELEMENT age (#PCDATA)>
<!ELEMENT messages (message+)>
<!ELEMENT message (title,content)>
<!ELEMENT title (#PCDATA)>
<!ELEMENT content (#PCDATA)>

xml文件

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE persons SYSTEM "persons.dtd">
<persons>
	<person id="_1">
		<sex/>
		<name/>
		<age/>
		<messages>
			<message>
				<title/>
				<content/>
			</message>
		</messages>
	</person>
	<person id="_2">
		<sex></sex>
		<name></name>
		<age></age>
		<messages>
			<message>
				<title></title>
				<content></content>
			</message>
		</messages>
	</person>
</persons>

二、schema入门

2.1 命名空间

在xml中引入schema

2.2 元素和属性

schema文件

<?xml version="1.0" encoding="UTF-8"?>
<schema xmlns="http://www.w3.org/2001/XMLSchema" targetNamespace="http://www.example.org/02"
	xmlns:tns="http://www.example.org/02" elementFormDefault="qualified">
	<element name="books">
		<complexType>
		<!-- maxOccurs表示最大出现次数 -->
			<sequence maxOccurs="unbounded">
				<element name="book">
					<complexType>
						<sequence minOccurs="1" maxOccurs="unbounded">
							<element name="title" type="string" />
							<element name="content" type="string" />
							<choice>
								<element name="author" type="string" />
								<element name="authors">
									<complexType>
										<all><!-- 每个元素只能出现一次 -->
											<element name="author" type="string"/>
										</all>
									</complexType>
								</element>
							</choice>
						</sequence>
						<attribute name="id" type="int" use="required"/>
					</complexType>
				</element>
			</sequence>
		</complexType>
	</element>
</schema>

引用上面schema文件的xml文件

<?xml version="1.0" encoding="UTF-8"?>
<book:books xmlns:book="http://www.example.org/02"
	  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	  xsi:noNamespaceSchemaLocation="02.xsd">
	<book:book id="1">
		<book:title>Java in action</book:title>
		<book:content>Java is good</book:content>
		<book:author>Bruce</book:author>
	</book:book>
	<book:book id="2">
		<book:title>SOA in action</book:title>
		<book:content>soa is difficult</book:content>
		<book:authors>
			<book:author>Jike</book:author>
		</book:authors>
	</book:book>
</book:books>

2.3 相关的设计方案

1、Russian Doll【如上面的例子】

只有一个根元素，通过嵌套的方式完成编写

优点：结构清晰，根元素只有一个

缺点：元素无法重用

2、Salami Slice

优点：能够进行最大化重用

缺点：根元素不清晰

<?xml version="1.0" encoding="UTF-8"?>
<schema xmlns="http://www.w3.org/2001/XMLSchema" 
		targetNamespace="http://www.example.org/03" 
		xmlns:tns="http://www.example.org/03" 
		elementFormDefault="qualified">

	<element name="book" type="tns:bookType"></element>
	<element name="id" type="int"/>
	<element name="title" type="string"/>
	<element name="content" type="string"/>
	
	
	<complexType name="bookType">
		<sequence>
			<element ref="tns:id"/>
			<element ref="tns:title"/>
			<element ref="tns:content"/>
		</sequence>
	</complexType>
</schema>

3、Venetian Blind

2.4 schema之间的相互引用

student.xsd文件

<?xml version="1.0" encoding="UTF-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" 
		targetNamespace="http://www.example.org/classroom" 
		xmlns:tns="http://www.example.org/classroom" 
		elementFormDefault="qualified">
	<xsd:element name="student" type="tns:studentType"/>
	
	<xsd:complexType name="studentType">
		<xsd:sequence>
			<xsd:element name="name" type="xsd:string"/>
			<xsd:element name="sex" type="tns:sexType"/>
		</xsd:sequence>
	</xsd:complexType>
	
	<xsd:simpleType name="sexType">
		<xsd:restriction base="xsd:string">
			<xsd:enumeration value="男"/>
			<xsd:enumeration value="女"/>
		</xsd:restriction>
	</xsd:simpleType>
</xsd:schema>

classroom.xsd文件

<?xml version="1.0" encoding="UTF-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" 
		targetNamespace="http://www.example.org/classroom" 
		xmlns:tns="http://www.example.org/classroom" 
		elementFormDefault="qualified">
	<xsd:include schemaLocation="student.xsd"/>
	
	<xsd:element name="classroom" type="tns:classroomType"/>
	
	<xsd:complexType name="classroomType">
		<xsd:sequence>
			<xsd:element name="grade" type="tns:gradeType"/>
			<xsd:element name="name" type="xsd:string"/>
			<!--  <xsd:element name="stus">
				<xsd:complexType>
					<xsd:sequence minOccurs="1" maxOccurs="unbounded">
						<xsd:element name="student" type="tns:studentType"/>
					</xsd:sequence>
				</xsd:complexType>
			</xsd:element>
			-->
			<xsd:sequence minOccurs="1" maxOccurs="unbounded">
				<xsd:element name="student" type="tns:studentType"/>
			</xsd:sequence>
		</xsd:sequence>
	</xsd:complexType>
	
	<xsd:simpleType name="gradeType">
		<xsd:restriction base="xsd:int">
			<xsd:minInclusive value="2000"/>
			<xsd:maxInclusive value="3000"/>
		</xsd:restriction>
	</xsd:simpleType>
</xsd:schema>

三、Stax处理xml

3.1 根据光标查找

book.xml

<?xml version="1.0" encoding="UTF-8"?>

<bookstore>

<book category="COOKING">
  <title>Everyday Italian</title>
  <author>Giada De Laurentiis</author>
  <year>2005</year>
  <price>30.00</price>
</book>

<book category="CHILDREN">
  <title>Harry Potter</title>
  <author>J K. Rowling</author>
  <year>2005</year>
  <price>29.99</price>
</book>

<book category="WEB">
  <title>XQuery Kick Start</title>
  <author>James McGovern</author>
  <author>Per Bothner</author>
  <author>Kurt Cagle</author>
  <author>James Linn</author>
  <author>Vaidyanathan Nagarajan</author>
  <year>2003</year>
  <price>49.99</price>
</book>

<book category="WEB">
  <title>Learning XML</title>
  <author>Erik T. Ray</author>
  <year>2003</year>
  <price>39.95</price>
</book>

</bookstore>

@Test
	public void test02() {
		XMLInputFactory factory = XMLInputFactory.newInstance();
		InputStream is = null;
		try {
			is = TestStax.class.getClassLoader().getResourceAsStream("books.xml");
			XMLStreamReader reader = factory.createXMLStreamReader(is);
			while(reader.hasNext()) {
				int type = reader.next();
				if(type==XMLStreamConstants.START_ELEMENT) {
					String name = reader.getName().toString();
					if(name.equals("book")) {
						System.out.println(reader.getAttributeName(0)+":"+reader.getAttributeValue(0));
					}
				}
			}
		} catch (XMLStreamException e) {
			e.printStackTrace();
		} finally {
			try {
				if(is!=null) is.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}

@Test
	public void test03() {
		XMLInputFactory factory = XMLInputFactory.newInstance();
		InputStream is = null;
		try {
			is = TestStax.class.getClassLoader().getResourceAsStream("books.xml");
			XMLStreamReader reader = factory.createXMLStreamReader(is);
			while(reader.hasNext()) {
				int type = reader.next();
				
				if(type==XMLStreamConstants.START_ELEMENT) {
					String name = reader.getName().toString();
					if(name.equals("title")) {
						System.out.print(reader.getElementText()+":");
					}
					if(name.equals("price")) {
						System.out.print(reader.getElementText()+"\n");
					}
				}
			}
		} catch (XMLStreamException e) {
			e.printStackTrace();
		} finally {
			try {
				if(is!=null) is.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}

3.2 基于迭代模型的查找

@Test
	public void test04() {
		XMLInputFactory factory = XMLInputFactory.newInstance();
		InputStream is = null;
		try {
			is = TestStax.class.getClassLoader().getResourceAsStream("books.xml");
			//基于迭代模型的操作方式
			XMLEventReader reader = factory.createXMLEventReader(is);
			int num = 0;
			while(reader.hasNext()) {
				//通过XMLEvent来获取是否是某种节点类型
				XMLEvent event = reader.nextEvent();
				if(event.isStartElement()) {
					//通过event.asxxx转换节点
					String name = event.asStartElement().getName().toString();
					if(name.equals("title")) {
						System.out.print(reader.getElementText()+":");
					}
					if(name.equals("price")) {
						System.out.print(reader.getElementText()+"\n");
					}
				}
				num++;
			}
			System.out.println(num);
		} catch (XMLStreamException e) {
			e.printStackTrace();
		} finally {
			try {
				if(is!=null) is.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}

3.3 过滤器的使用

@Test
	public void test05() {
		XMLInputFactory factory = XMLInputFactory.newInstance();
		InputStream is = null;
		try {
			is = TestStax.class.getClassLoader().getResourceAsStream("books.xml");
			//基于Filter的过滤方式，可以有效的过滤掉不用进行操作的节点，效率会高一些
			XMLEventReader reader = factory.createFilteredReader(factory.createXMLEventReader(is),
					new EventFilter() {
						@Override
						public boolean accept(XMLEvent event) {
							//返回true表示会显示，返回false表示不显示
							if(event.isStartElement()) {
								String name = event.asStartElement().getName().toString();
								if(name.equals("title")||name.equals("price"))
									return true;
							}
							return false;
						}
					});
			int num = 0;
			while(reader.hasNext()) {
				//通过XMLEvent来获取是否是某种节点类型
				XMLEvent event = reader.nextEvent();
				if(event.isStartElement()) {
					//通过event.asxxx转换节点
					String name = event.asStartElement().getName().toString();
					if(name.equals("title")) {
						System.out.print(reader.getElementText()+":");
					}
					if(name.equals("price")) {
						System.out.print(reader.getElementText()+"\n");
					}
				}
				num++;
			}
			System.out.println(num);
		} catch (XMLStreamException e) {
			e.printStackTrace();
		} finally {
			try {
				if(is!=null) is.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}

3.4 XPath的使用

@Test
	public void test06() {
		InputStream is = null;
		try {
			is = TestStax.class.getClassLoader().getResourceAsStream("books.xml");
			//创建文档处理对象
			DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
			//通过DocumentBuilder创建doc的文档对象
			Document doc = db.parse(is);
			//创建XPath
			XPath xpath = XPathFactory.newInstance().newXPath();
			//第一个参数就是xpath,第二参数就是文档
			NodeList list = (NodeList)xpath.evaluate("//book[@category='WEB']", doc,XPathConstants.NODESET);
			for(int i=0;i<list.getLength();i++) {
				//遍历输出相应的结果
				Element e = (Element)list.item(i);
				System.out.println(e.getElementsByTagName("title").item(0).getTextContent());
			}
		} catch (ParserConfigurationException e) {
			e.printStackTrace();
		} catch (SAXException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (XPathExpressionException e) {
			e.printStackTrace();
		} finally {
			try {
				if(is!=null) is.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}

3.5 使用XMLStreamWriter创建xml

@Test
	public void test07() {
		try {
			XMLStreamWriter xsw = XMLOutputFactory.newInstance().createXMLStreamWriter(System.out);
			xsw.writeStartDocument("UTF-8","1.0");
			xsw.writeEndDocument();
			String ns = "http://11:dd";
			xsw.writeStartElement("nsadfsadf","person",ns);
			xsw.writeStartElement(ns,"id");
			xsw.writeCharacters("1");
			xsw.writeEndElement();
			xsw.writeEndElement();
			xsw.flush();
			xsw.close();
		} catch (XMLStreamException e) {
			e.printStackTrace();
		} catch (FactoryConfigurationError e) {
			e.printStackTrace();
		}
	}

3.6 使用Transformer更新节点信息

@Test
	public void test08() {
		InputStream is = null;
		try {
			is = TestStax.class.getClassLoader().getResourceAsStream("books.xml");
			//创建文档处理对象
			DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
			//通过DocumentBuilder创建doc的文档对象
			Document doc = db.parse(is);
			//创建XPath
			XPath xpath = XPathFactory.newInstance().newXPath();
			Transformer tran = TransformerFactory.newInstance().newTransformer();
			tran.setOutputProperty(OutputKeys.ENCODING,"UTF-8");
			tran.setOutputProperty(OutputKeys.INDENT, "yes");
			//第一个参数就是xpath,第二参数就是文档
			NodeList list = (NodeList)xpath.evaluate("//book[title='Learning XML']", doc,XPathConstants.NODESET);
			//获取price节点
			Element be = (Element)list.item(0);
			Element e = (Element)(be.getElementsByTagName("price").item(0));
			e.setTextContent("333.9");
			Result result = new StreamResult(System.out);
			//通过tranformer修改节点
			tran.transform(new DOMSource(doc), result);
		} catch (ParserConfigurationException e) {
			e.printStackTrace();
		} catch (SAXException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (XPathExpressionException e) {
			e.printStackTrace();
		} catch (TransformerConfigurationException e) {
			e.printStackTrace();
		} catch (TransformerFactoryConfigurationError e) {
			e.printStackTrace();
		} catch (TransformerException e) {
			e.printStackTrace();
		} finally {
			try {
				if(is!=null) is.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}

转载于:https://my.oschina.net/heroShane/blog/204145