文章目录
1、读取XML文档
1.1、使用DOM解析
-
节点类型: ELEMENT_NODE, ATTRIBUTE_NODE, TEXT_NODE
-
java解析xml文件
-
//java版解析dom文档 def fac = DocumentBuilderFactory.newInstance() def builder = fac.newDocumentBuilder() def doc = builder.parse(new FileInputStream("data/plan.xml")) def plan = doc.documentElement String info(node) { switch (node.nodeType) { case ELEMENT_NODE: return "element: $node.nodeName" case ATTRIBUTE_NODE: return "attribute: $node.nodeName=$node.nodeValue" case TEXT_NODE: return "text: $node.nodeValue" } return "some other type: $node.nodeType" } assert info(plan) == 'element: plan' assert plan.childNodes.length == 5 //查找节点为nodeName为week def firstWeek = plan.childNodes.find { it.nodeName == 'week' } assert info(firstWeek) == 'element: week' def firstTask = firstWeek.childNodes.item(1) assert info(firstTask) == 'element: task' def firstTaskText = firstTask.childNodes.item(0) assert info(firstTaskText) == 'text: easy' def firstTaskTitle = firstTask.attributes.getNamedItem('title') assert info(firstTaskTitle) == 'attribute: title=read XML chapter' def firstTaskTitleText = firstTaskTitle.childNodes.item(0) assert info(firstTaskTitleText) == 'text: read XML chapter'
-
<plan> <week capacity="8"> <task done="2" total="2" title="read XML chapter">easy</task> <task done="3" total="3" title="try some reporting">fun</task> <task done="1" total="2" title="use in current project"/> </week> <week capacity="8"> <task done="0" total="1" title="re-read DB chapter"/> <task done="0" total="3" title="use DB/XML combination"/> </week> </plan>
-
1.2、使用groovy解析
-
XmlSlurper只是将xml解析成GPath,不需要额外的内存空间,而xmlParser需要存储中间结果,所以需要额外的内存空间
-
package com.jack.groovy.ch14 import groovy.xml.DOMBuilder import groovy.xml.XmlParser import groovy.xml.XmlSlurper import groovy.xml.dom.DOMCategory /** * @author liangchen* @date 2020/12/1 */ //使用DOMCategory解析,变成链表结构的,利用注解获取对应的值,比如@capacity就是获取容量 def doc = DOMBuilder.parse(new FileReader('data/plan.xml')) def plan = doc.documentElement use(DOMCategory) { assert plan.name() == 'plan' assert plan.week[0].name() == 'week' assert plan.week[0].'@capacity' == '8' assert plan.week.task[0].name() == 'task' assert plan.week.task[0].text() == 'easy' } // groovy's xmlParser, @属性名 获取属性的值 plan = new XmlParser().parse(new File('data/plan.xml')) assert plan.name() == 'plan' assert plan.week[0].name()== 'week' def firstTask = plan.week[0].task[0] assert firstTask.name() == 'task' assert firstTask.text() == 'easy' assert firstTask.@title=='read XML chapter' //使用XmlSlurper() 解析 plan = new XmlSlurper().parse(new File('data/plan.xml')) assert plan.week.task.size() == 5 //属性字段求和 assert plan.week.task.@done*.toInteger().sum() == 6 assert plan.week[1].task.every { it.@done == '0' } // 广度遍历 assert plan.breadthFirst()*.name().join('->') == 'plan->week->week->task->task->task->task->task' //深度遍历 assert plan.depthFirst()*.name().join('->') == 'plan->week->task->task->task->week->task->task' // 深度遍历简写 assert plan.depthFirst()*.name() == plan.'**'*.name()
1.3、使用SAX解析
-
事件驱动解析(event based parsing)
-
package com.jack.groovy.ch14 import org.xml.sax.helpers.DefaultHandler import org.xml.sax.* import javax.xml.parsers.SAXParserFactory /** * 继承 DefualtHandler接口 * @author liangchen* @date 2020/12/3 */ class PlanHandler extends DefaultHandler { def underway = [] def upcoming = [] /** * * @param namespace * @param localName * @param qName 标签名称 * @param attrs 属性对象 */ void startElement(String namespace, String localName, String qName, Attributes attrs){ print namespace + "==" + localName + "==" + qName +"==" +attrs if(qName != 'task') return def title = attrs.getValue('title') def total = attrs.getValue('total') switch ((attrs.getValue('done'))) { case '0' : upcoming << title; break case {it != total} : underway << title; break } } } // 创建一个自已handler def handler = new PlanHandler() // 创建一个工厂对象 def factory = SAXParserFactory.newInstance() //文档读取器 def reader = factory.newSAXParser().XMLReader // 文档处理器 reader.contentHandler = handler new File('data/plan.xml').withInputStream { is -> reader.parse(new InputSource(is)) } assert handler.underway == ['use in current project'] assert handler.upcoming == ['re-read DB chapter','use DB/XML combination']
1.4、使用StAX解析文档(Streaming API For XML)
-
package com.jack.groovy.ch14 import javax.xml.stream.XMLInputFactory import javax.xml.stream.XMLStreamReader /** * @author liangchen* @date 2020/12/3 */ def input = 'file:data/plan.xml'.toURL() def underway = [] def upcoming =[] // 定义解析工具方法, def eachStartElement(inputStream, Closure yield) { // 创建一个流解析XML读取器 def token = XMLInputFactory.newInstance() .createXMLStreamReader(inputStream) try { while (token.hasNext()) { //如果是解析到一个标签时候, 调用 闭包 yield方法进行处理 if(token.startElement) yield token token.next() } }finally { // token、inputStream不为空就关闭 token?.close() inputStream?.close() } } class XMLStreamCategory{ static Object get(XMLStreamReader self, String key) { return self.getAttributeValue(null, key) } } // 将token字符转换标签对象,也就使用XMLStreamCategory get方法 use(XMLStreamCategory){ eachStartElement(input.openStream()) { element -> if (element.name.toString() != 'task') { return } switch (element.done) { case '0' : upcoming << element.title break case {it != element.total}: underway << element.title } } } assert underway == ['use in current project'] assert upcoming == ['re-read DB chapter', 'use DB/XML combination']
2、处理XML
2.1、一处处理
-
根据XML产生另一种格式文件(HTML报告)
-
可以在节点增加字段属性,如下面增加status字段
-
package com.jack.groovy.ch14 import groovy.xml.MarkupBuilder /** * @author liangchen* @date 2020/12/4 */ // 在内存准备生成HTML 报告 // 数字字符串,转成 数字类型 void numberfy(Node node) { def atts = node.attributes() atts.keySet().grep(['capacity', 'total', 'done']).each { atts[it] = atts[it].toInteger() } node.each{ if(it instanceof Node) numberfy(it) } } // done 和 total属性,给这个node节点增加字段status(scheduled, in progress, finished) void taskStatus(task) { def atts = task.attributes() switch (atts.done) { case 0: atts.status = 'scheduled'; break case 1..<atts.total: atts.status = 'in progress'; break default: atts.status = 'finished'; } } // 设置week属性,增加status默认是scheduled, void weekStatus(week) { week.task.each{ taskStatus(it) } def atts = week.attributes() atts.status = 'scheduled' // 遍历每个节点task的状态是否全部为完成 if(week.task.every { it.@status == 'finished' }){ atts.status = 'finished' } // 遍历任意一个为task状态 it.@status=in progress if(week.task.any { it.@status == 'in progress' }){ atts.status = 'in progress' } } void htmlReport(builder, plan) { builder.html{ head{ title('Current Groovy progress') link(rel:'stylesheet', type: 'text/css', href :'style.css') } body{ // 节点本身和索引位置 plan.week.eachWithIndex{ week, i -> h1("Week No. $i:${week.@status}") d1 { week.task.each { task -> dt(class: task.@status, task.@title) dd("(${task.@done}/${task.@total}): ${task.@status}") } } } } } } def node = new XmlParser().parse(new File('data/plan.xml')) numberfy(node) node.week.each{weekStatus(it)} new File('data/GroovyPlans.html').withWriter {writer -> def builder = new MarkupBuilder(writer) htmlReport(builder, node) }
2.2、流处理
-
懒加载
-
package com.jack.groovy.ch14 import groovy.xml.StreamingMarkupBuilder import groovy.xml.XmlSlurper /** * @author liangchen* @date 2020/12/4 */ // 生成没有修改流的xml def path = new XmlSlurper().parse(new File('data/plan.xml')) def builder = new StreamingMarkupBuilder() // 去掉空格形成一行 def copier = builder.bind {mkp.yield(path)} def result = "$copier" assert result.startsWith('<plan><week') assert result.endsWith('</week></plan>') //打印到控制台 System.out << copier // 通过流将xml转换HTML, 流式解析 def taskStatus(task) { switch (task.@done.toInteger) { case 0: return 'scheduled' case 1..<task.@total.toInteger() : return 'in progress' default: return 'finished' } } def weekStatus(week) { if(week.task.every { taskStatus(it) == 'finished' }){ return 'finished' } if(week.task.any{taskStatus(it) == 'in progress'}){ return 'in progress' } return 'scheduled' } def plan = new XmlSlurper().parse(new File('data/plan.xml')) Closure markup = { html{ head{ title('Current Groovy progress') link(rel:'stylesheet', type: 'text/css', href: 'style.css') } body{ plan.week.eachWithIndex{ week, i -> h1("Week No. $i: ${owner.weekStatus(week)}") d1 { week.task.each{task -> def status = owner.taskStatus(task) dt(class: status, task.@title) dd("(${task.@done}/${task.@total}): $status") } } } } } } def heater = new StreamingMarkupBuilder().bind(markup) def outfile = new File('data/StreamedGroovyPlans.xml') outfile.withWriter {it << heater}
2.3、更新XML
-
更新xml文档
-
package com.jack.groovy.ch14 import groovy.xml.DOMBuilder import groovy.xml.XmlUtil import groovy.xml.dom.DOMCategory /** 更新xml文件 * @author liangchen* @date 2020/12/4 */ // 更新检查 class UpdateChecker{ static check(text) { def updated = new XmlParser().parseText(text) updated.week[0].with{w0 -> //第一周所有done的值之和是否等于7 assert w0.task.@done*.toInteger().sum() == 7 assert w0.find{ it.text() == 'time saver'} } updated.week[1].with{w1 -> assert w1.children().size() ==2 assert w1.find{it.@total == '1'} assert w1.find{it.@title == "build web service client"} assert !w1.find{it.@title == 'use DB/XML combination'} } } } // 变更dom数据 def doc = DOMBuilder.parse(new FileReader('data/plan.xml')) def plan = doc.documentElement use(DOMCategory) { plan.week[0].task[2]['@done'] = '2' plan.week[0].task[2].value = 'time saver' plan.week[1].task[1].replaceNode{ task(done:'0', total:'1', title:'build web service client') } } UpdateChecker.check(XmlUtil.serialize(plan))
-
xmlparser 和 xmlslurper 修改文档
-
package com.jack.groovy.ch14 import groovy.xml.XmlSlurper import groovy.xml.XmlUtil /** 采用 XmlParser更新 * @author liangchen* @date 2020/12/4 */ def plan = new XmlParser().parse(new File('data/plan.xml')) // 第一个week,第三个task的属性为2 plan.week[0].task[2].@done = '2' plan.week[0].task[2].value = 'time saver' // 替换第2个week,第二个task plan.week[1].task[1].replaceNode{ task(done:'0', total:'4', title:'build web service') } // 在第二个task新增一个节点task plan.week[1].task[1] + { task(done:'0', total:'1', title:'build web service client') } UpdateChecker.check(XmlUtil.serialize(plan)) // XmlSlurper修改数据本质和 xmlParser无关 def slurper = new XmlSlurper().parse(new File('data/plan.xml')) // 第一个week,第三个task的属性为2 plan.week[0].task[2].@done = '2' plan.week[0].task[2].value = 'time saver' // 替换第2个week,第二个task plan.week[1].task[1].replaceNode{ task(done:'0', total:'4', title:'build web service') } // 在第二个task新增一个节点task plan.week[1].task[1] + { task(done:'0', total:'1', title:'build web service client') } UpdateChecker.check(XmlUtil.serialize(plan))
2.4、使用XPath合并
-
xpath使用
-
package com.jack.groovy.ch14 import groovy.xml.DOMBuilder import groovy.xml.dom.DOMCategory import javax.xml.xpath.XPathConstants import javax.xml.xpath.XPathFactory /** * @author liangchen* @date 2020/12/4 */ // 14.14XPath 使用 def doc = DOMBuilder.parse(new FileReader('data/plan.xml')) def plan = doc.documentElement def xpath = XPathFactory.newInstance().newXPath() def out = new StringBuilder() use(DOMCategory){ xpath.evaluate('//week', plan, XPathConstants.NODESET).eachWithIndex{ wk, i -> out << "\nWeek No. $i\n" int total = xpath.evaluate('sum(task/@total)', wk,XPathConstants.NUMBER) int done = xpath.evaluate('sum(task/@done)', wk, XPathConstants.NUMBER) out << " planned $total of ${wk.'@capacity'}\n" out << " done $done of $total" } } System.out << out
-
package com.jack.groovy.ch14 import groovy.xml.DOMBuilder import groovy.xml.dom.DOMCategory import javax.xml.xpath.XPathConstants import javax.xml.xpath.XPathFactory import groovy.text.SimpleTemplateEngine as STE /** * @author liangchen* @date 2020/12/4 */ def doc = DOMBuilder.parse(new FileReader('data/plan.xml')) def plan = doc.documentElement def xpath = XPathFactory.newInstance().newXPath() def binding = [scale: 1, weeks: []] use(DOMCategory){ xpath.evaluate('//week',plan, XPathConstants.NODESET).each { week -> binding.weeks << [ total : (int) xpath.evaluate('sum(task/@total)', week, XPathConstants.NUMBER), done : (int) xpath.evaluate('sum(task/@done)', week, XPathConstants.NUMBER), capacity: week.'@capacity'.toInteger() ] } } def max = binding.weeks.capacity.max() if(max >0) binding.scale = 200.intdiv(max) def templateFile = new File('data/GroovyPlans.template.html') def template = new STE().createTemplate(templateFile) new File('data/XPathGroovyPlans.html').withWriter { it << template.make(binding) }
3、解析和建立JSON
3.1、解析JSON
-
package com.jack.groovy.ch14 import groovy.json.JsonSlurper /** * @author liangchen* @date 2020/12/5 */ // 利用JsonSlurper 解析json def plan = new JsonSlurper().parse(new File('data/plan.json')) assert plan.weeks[0].tasks[0].status == 'easy' assert plan.weeks[1].capacity == 8 assert plan.weeks[1].tasks[0].title =='re-read DB chapter'
3.2、构建JSON
package com.jack.groovy.ch14
import groovy.json.JsonBuilder
/**
* @author liangchen* @date 2020/12/5
*/
def builder = new JsonBuilder()
builder.weeks{
capacity '8'
tasks(
[{
done '0'
total '4'
title 'build web service'
},{
done '0'
total '1'
title 'build web service client'
}
]
)
}
println builder.toString()
// 使用builder方法的控制语句
def invoiceBuilder = new JsonBuilder()
invoiceBuilder{
invoices(1..3) { day ->
invoice(date: "2015-01-0$day") {
item(count: day){
product(name:'ULC', dollar:1499)
}
}
}
}
println invoiceBuilder.toString()
import static groovy.json.JsonOutput.*
//输出json对象, 数组
def json = toJson([date: '2015-01-01', time: '6 am'])
assert json =='{"date":"2015-01-01","time":"6 am"}'
class Athlete{String first, last}
//对象转json字符串
def mj = new Athlete(first: 'Michael', last: 'Jordan')
assert toJson(mj) == '{"first":"Michael","last":"Jordan"}'
def pt = new Athlete(first: 'Paul', last: 'Tergat')
def athletes = [basketball:mj, marathon:pt]
json = toJson(athletes)
System.out << prettyPrint(json)