自定义类ParseGoldRDF extends 类DefaultHandler,使用DefaultHandler类中的startDocument()、endDocument()、startElement()、endElement()、characters(),解析完成后直接导入到MongoDB数据库中。
package process;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
/*
* @date 2015-11-08
*/
public class ParseGoldRDF extends DefaultHandler {
private OperateDB db = null;
private List<Record> rcdLst = null;
private Record rcd = null;
private String nodeName = null;
private String value = null;
private String content = null;
private boolean flag = false;
Map<String, Object> mapKeyValue = new HashMap<String, Object>();
public ParseGoldRDF(OperateDB db) {
this.db = db;
}
// 开始解析文档
public void startDocument() throws SAXException {
super.startDocument(); // 调用父类的函数
rcdLst = new ArrayList<Record>();
}
// 结束文档解析
public void endDocument() throws SAXException {
super.endDocument();
}
// 开始解析节点
// qName: 当前节点的名字
// attributes: 当前节点的属性
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
super.startElement(uri, localName, qName, attributes);
nodeName = qName; // 记录当前节点的名字
if (qName.equals("Cell")) {
flag = true;
rcd = new Record();
}
if (flag)
value = attributes.getValue(0);
}
// ch: 存储元素的内容
// start: 内容的开始位置
// length: 内容的长度
public void characters(char[] ch, int start, int length)
throws SAXException {
super.characters(ch, start, length);
if (!flag)
return;
content = new String(ch, start, length);
}
// 结束节点解析
public void endElement(String uri, String localName, String qName)
throws SAXException {
super.endElement(uri, localName, qName);
// 结束一个cell的解析
if (qName.equals("Cell")) {
String name = (String) mapKeyValue.get("entity1");
if (java.lang.Character.isUpperCase(name.charAt(0))) //首字母大写,class
mapKeyValue.put("type", "class");
else //否则是prop
mapKeyValue.put("type", "prop");
rcd.setAttrKeyValue(mapKeyValue);
rcdLst.add(rcd);
mapKeyValue.clear();
flag = false;
return;
}
switch (nodeName) {
case "entity1":
case "entity2":
String[] ele = value.split("#");
value = ele[1];
mapKeyValue.put(nodeName, value);
break;
case "relation":
mapKeyValue.put(nodeName, content);
break;
default:
break;
}
}
//解析GD.rdf,返回map
public void parseGD(String fileName,
Map<String, String> alignClass, Map<String, String> alignProp) throws Exception{
alignClass.clear();
alignProp.clear();
SAXParserFactory factory = SAXParserFactory.newInstance();
try {
// 创建解析器
SAXParser parser = factory.newSAXParser();
parser.parse(fileName, this);
} catch (ParserConfigurationException e) {
System.out.println("ParserConfig error");
} catch (SAXException e) {
System.out.println("SAXException: xml not well formed");
} catch (IOException e) {
System.out.println("IO error");
} finally{
for(Record rcd:rcdLst){
Map<String, Object> map = rcd.getAttrKeyValue();
String type = map.get("type").toString();
String entity1 = map.get("entity1").toString();
String entity2 = map.get("entity2").toString();
if(type.equals("class"))
alignClass.put(entity1, entity2);
else
alignProp.put(entity1, entity2);
}
}
}
//解析GD.rdf,写入数据库
public void parseDocument(String fileName) {
// 实例化SAXParserFactory对象
SAXParserFactory factory = SAXParserFactory.newInstance();
try {
// 创建解析器
SAXParser parser = factory.newSAXParser();
parser.parse(fileName, this);
} catch (ParserConfigurationException e) {
System.out.println("ParserConfig error");
} catch (SAXException e) {
System.out.println("SAXException: xml not well formed");
} catch (IOException e) {
System.out.println("IO error");
} finally{
// 写入数据库
db.WriteDB(rcdLst, false);
}
}
/**
* @param args
*/
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
String rootPath = "E:\\01-My Papers\\08Alignment of Graphical Linked Data in Semantic Web\\data\\OAEI2010\\benchmarks\\";
String objName="304";
String nameColl = "C"+objName+"GD";
OperateDB db = new OperateDB("OAEI2010", nameColl);
String fileGD = rootPath + objName+"\\refalign.rdf";
ParseGoldRDF handler = new ParseGoldRDF(db);
handler.parseDocument(fileGD);
System.out.println("finish parsing "+objName+"GD.rdf");
}
}