这当然是可行的,但需要一些工作。您可以创建一个访问者来跟踪它在树中的位置并随着它的进展累积文本和实例偏移量。但是,该解决方案也可以直接由 SAX 处理程序实现,这样会快很多。
这应该给出一些开始:
public class Main extends DefaultHandler {
StringBuilder buf = new StringBuilder();
boolean collecting = false;
int ic = 0;
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
if (localName.equals("sentence")) {
System.out.printf("s.id=%s\n", attributes.getValue("id"));
collecting = true;
buf.setLength(0);
ic = 0;
} else if (localName.equals("instance")) {
++ic;
System.out.printf("i%d.id=%s\n", ic, attributes.getValue("id"));
System.out.printf("i%d.start=%s\n", ic, buf.length());
}
}
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
if (localName.equals("sentence")) {
collecting = false;
System.out.printf("s.text=%s\n", buf.toString());
} else if (localName.equals("instance")) {
System.out.printf("i%d.end=%s\n", ic, buf.length());
}
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
if (collecting) {
buf.append(ch, start, length);
}
}
public static void main(String[] args) throws Exception {
SAXParserFactory f = SAXParserFactory.newInstance();
f.setNamespaceAware(true);
f.newSAXParser().parse(Main.class.getResourceAsStream("data.xml"),
new Main());
}
}