import com.sun.org.apache.xerces.internal.impl.xpath.regex.ParseException; //导入依赖的package包/类
@Override
public void read(File file) throws IOException {
try {
Document doc = db.parse(file);
NodeList nlTimeSlots = (NodeList) xp.evaluate(
"/ANNOTATION_DOCUMENT/TIME_ORDER/TIME_SLOT", doc,
XPathConstants.NODESET);
HashMap timeSlots = new HashMap();
for (int i = 0; i < nlTimeSlots.getLength(); i++) {
Element timeSlot = (Element) nlTimeSlots.item(i);
String id = timeSlot.getAttribute("TIME_SLOT_ID");
int time = Integer
.parseInt(timeSlot.getAttribute("TIME_VALUE"));
timeSlots.put(id, time / 1000.0);
}
String txt;
NodeList nlSegments = (NodeList) xp
.evaluate(
"/ANNOTATION_DOCUMENT/TIER/ANNOTATION/ALIGNABLE_ANNOTATION",
doc, XPathConstants.NODESET);
double segment_start, segment_end;
String segment_text;
for (int i = 0; i < nlSegments.getLength(); i++) {
Element elSegment = (Element) nlSegments.item(i);
txt = elSegment.getAttribute("TIME_SLOT_REF1");
if (!timeSlots.containsKey(txt))
throw new RuntimeException("Missing time slot: " + txt);
segment_start = timeSlots.get(txt);
txt = elSegment.getAttribute("TIME_SLOT_REF2");
if (!timeSlots.containsKey(txt))
throw new RuntimeException("Missing time slot: " + txt);
segment_end = timeSlots.get(txt);
segment_text = (String) xp.evaluate("ANNOTATION_VALUE",
elSegment, XPathConstants.STRING);
segment_text = segment_text.toLowerCase()
.replaceAll("[\\p{Punct}]+", " ")
.replaceAll("\\s+", " ").trim();
addSegment(0, segment_start, segment_end, segment_text);
}
} catch (ParseException | SAXException | XPathExpressionException e) {
throw new IOException(e);
}
}