这有点棘手,因为您必须先深入遍历 DOM。NodeTraversor允许您这样做。
这是一个例子:
package stuff;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;
public class A {
public static void main(String[] args) {
String html = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" +
"<html xmlns=\"http://www.w3.org/1999/xhtml\">" +
"<head>" +
"<title>TestNG: Unit Test</title>" +
"</head>" +
"<body>" +
"<a id=\"summary\"></a>" +
"<table cellspacing=0 cellpadding=0 class=\"param\" style=\"float: left; width:630px;\">" +
"<tr><th>Test</th><th class=\"numi\">Methods<br/>Passed</th><th class=\"numi\">Scenarios<br/>Passed</th><th class=\"numi\"># skipped</th><th class=\"numi\"># failed</th><th class=\"numi\">Total<br/>Time</th><th class=\"numi\">Included<br/>Groups</th><th class=\"numi\">Excluded<br/>Groups</th></tr>" +
"</table>" +
"</body>" +
"</html>";
System.out.println(parse(html));
String html2 = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" +
"<html xmlns=\"http://www.w3.org/1999/xhtml\">" +
"<head>" +
"<title>TestNG: Unit Test</title>" +
"</head>" +
"<body>" +
"<a id=\"something_else\"></a>" +
"<a id=\"summary\"></a>" +
"<table cellspacing=0 cellpadding=0 class=\"param\" style=\"float: left; width:630px;\">" +
"<tr><th>Test</th><th class=\"numi\">Methods<br/>Passed</th><th class=\"numi\">Scenarios<br/>Passed</th><th class=\"numi\"># skipped</th><th class=\"numi\"># failed</th><th class=\"numi\">Total<br/>Time</th><th class=\"numi\">Included<br/>Groups</th><th class=\"numi\">Excluded<br/>Groups</th></tr>" +
"</table>" +
"</body>" +
"</html>";
System.out.println(parse(html2));
}
public static String parse(String html) {
Document document = Jsoup.parse(html);
final StringBuffer buffer = new StringBuffer();
NodeTraversor nd = new NodeTraversor(new NodeVisitor() {
private boolean finished = false;
@Override
public void tail(Node node, int depth) {
if (!finished && node instanceof Element) {
Element element = (Element) node;
if ("a".equals(element.tagName()) && element.hasAttr("id")
&& "summary".equals(element.attr("id")))
finished = true;
else
buffer.append(element.toString());
}
}
@Override
public void head(Node arg0, int arg1) {
}
});
buffer.append(document.head().html());
buffer.append("<body>");
nd.traverse(document.body());
return buffer.toString();
}
}
这不是特别好(尤其是在做的时候buffer.append("<body>");
)......但这很快:)
另请参阅此答案以获取相关示例。