这是我承诺的代码,用于将任意范围的 DOM 主体包装到任意 html 标记中,以便于提取、移动、替换、复制/粘贴等操作。
2015 年 12 月 19 日更新通过 wrapRange() 方法变体在文本中间添加了 TextNode 拆分,其中可选偏移到范围应该开始或结束的文本节点。现在可以在 jsoup DOM 模型中进行任意复制/粘贴/移动。
RangeWrapper.java 模块:
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.parser.Tag;
import java.util.ArrayList;
* Created by greg on 12/18/2015.
public class RangeWrapper {
* Wrap the supplied HTML around the "range" from startEl to endEl.*
* @param startEl the first element to be included into the range
* @param endEl the last element to be included into the range
* @param html HTML to wrap around this element, e.g.
* {@code <span class="head"></span>}. Can be arbitrarily deep.
* @return the wrapping element
public static Element wrapRange(Node startEl, Node endEl, String html) {
if (startEl == endEl) { // special case
return (Element) startEl.wrap(html).parentNode();
int startDepth = NodeWalker.getNodeDepth(startEl);
int endDepth = NodeWalker.getNodeDepth(endEl);
int minDepth = getRangeMinDepth(startEl, endEl);
int n;
while (startDepth > minDepth) {
Element parent = (Element)startEl.parentNode();
if ((n = startEl.siblingIndex()) > 0) {
// splitting the parent
ArrayList<Node> children = new ArrayList<Node>(parent.childNodes());
Element parent2 = new Element(Tag.valueOf(parent.tagName()), parent.baseUri(), parent.attributes());
for (int i = n; i < children.size(); i++)
startEl = parent2;
} else {
startEl = parent;
while (endDepth > minDepth) {
Element parent = (Element)endEl.parentNode();
if ((n = endEl.siblingIndex()) < parent.children().size()-1) {
// splitting the parent
ArrayList<Node> children = new ArrayList<Node>(parent.childNodes());
Element parent2 = new Element(Tag.valueOf(parent.tagName()), parent.baseUri(), parent.attributes());
for (int i = 0; i <= n; i++)
endEl = parent2;
} else {
endEl = parent;
// Now startEl and endEl are on the same depth == minDepth.
// Wrap the range with our html string
Element range = (Element) startEl.wrap(html).parentNode();
Node nextToAppend;
do {
nextToAppend = range.nextSibling();
// If nextToAppend is null, something is really wrong...
// Commented out to let it crash and investigate,
// so far it did not happen.
//if (nextToAppend == null)
// break;
} while (nextToAppend != endEl);
return range;
* Wrap the supplied HTML around the "range" from startEl to endEl.*
* @param startEl the first element to be included into the range
* @param stOffset if startEl is TextNode, split at this offset
* and include only the tail. Otherwise ignored.
* @param endEl the last element to be included into the range
* @param endOffset if endEl is a Text node, split at this offset
* and include only the head. Otherwise ignored.
* @param html HTML to wrap around this element, e.g. {@code <span class="head"></span>}. Can be arbitrarily deep.
* @return the wrapping element
public static Element wrapRange(Node startEl, int stOffset, Node endEl, int endOffset, String html) {
if (stOffset > 0 && startEl instanceof TextNode) {
TextNode tn = (TextNode) startEl;
if (endOffset < tn.getWholeText().length()-1) {
startEl = tn.splitText(stOffset); // Splits tn and adds tail to DOM, returns tail
if (endOffset > 0 && endEl instanceof TextNode) {
TextNode tn = (TextNode) endEl;
if (endOffset < tn.getWholeText().length()-1) {
tn.splitText(stOffset); // Splits tn and adds tail to DOM, we take head == original endEl
return wrapRange(startEl, endEl, html);
* Calculate the depth of the range between the two given nodes, relative to body.
* The body has depth 0.
* @param startNode the first element to be included into the range
* @param endNode the last element to be included into the range
* @return minimum depth found in the range
public static int getRangeMinDepth(final Node startNode, final Node endNode) {
class DepthVisitor implements NodeWalker.NodeWalkVisitor {
private int _minDepth = Integer.MAX_VALUE;
public boolean head(Node node, int depth) {
if (depth < _minDepth)
_minDepth = depth;
return true;
public boolean tail(Node node, int depth) {return true;}
int getMinDepth() { return _minDepth; }
DepthVisitor visitor = new DepthVisitor();
NodeWalker nw = new NodeWalker(visitor);
nw.walk(startNode, endNode);
return visitor.getMinDepth();
...以及上面代码使用的 NodeWalker.java,改编自 jsoup 包中的 NodeTraversor 和 NodeVisitor 类:
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.NodeVisitor;
* Depth-first node traversor. Use to iterate through all nodes under and including the specified root node.
* <p>
* This implementation does not use recursion, so a deep DOM does not risk blowing the stack.
* </p>
public class NodeWalker {
private NodeWalkVisitor visitor;
* Create a new traversor.
* @param visitor a class implementing the {@link NodeVisitor} interface, to be called when visiting each node.
public NodeWalker(NodeWalkVisitor visitor) {
this.visitor = visitor;
* Start a depth-first traverse of the whole body and all of its descendants.
* @param startNode the arbitrary start point node point within body to traverse from.
* @param endNode the arbitrary end point node point within body where we stop traverse.
* Can be null, in which case we walk until the end of the body.
public void walk(Node startNode, Node endNode) {
Node node = startNode;
int depth = getNodeDepth(startNode); // let's calulate depth relative to body, body is depth 0
while (node != null) {
if (!visitor.head(node, depth))
if (node.childNodeSize() > 0) {
node = node.childNode(0);
} else {
while (node.nextSibling() == null && depth > 0) {
if (!visitor.tail(node, depth) || node == endNode)
node = node.parentNode();
if (!visitor.tail(node, depth) || node == endNode)
node = node.nextSibling();
// The walkBack() was not needed, but leaving it here, may be useful for something...
// /**
// * Start a depth-first backward traverse of the whole body and all of its descendants.
// * @param startNode the arbitrary start point node point within body to traverse from.
// * @param endNode the arbitrary end point node point within body where we stop traverse.
// * Can be null, in which case we walk until the end of the body.
// */
// public void walkBack(Node startNode, Node endNode) {
// Node node = startNode;
// int depth = getNodeDepth(startNode); // let's calulate depth relative to body, body is depth 0
// while (node != null) {
// if (!visitor.tail(node, depth))
// break;
// if (node.childNodeSize() > 0) {
// node = node.childNode(node.childNodeSize() - 1);
// depth++;
// } else {
// while (node.previousSibling() == null && depth > 0) {
// if (!visitor.head(node, depth) || node == endNode)
// return;
// node = node.parentNode();
// depth--;
// }
// if (!visitor.head(node, depth) || node == endNode)
// break;
// node = node.previousSibling();
// }
// }
// }
* Calculate the depth of the given node relative to body. The body has depth 0.
* @param givenNode the node within the body to calculate depth for.
* @return the depth of the givenNode
public static int getNodeDepth(Node givenNode) {
Node node = givenNode;
int depth = 0; // let's calulate depth relative to body, body is depth 0
if (!(node instanceof Element) || !"body".equals(((Element) node).tagName())) {
do {
node = (Element)node.parentNode();
} while (node != null && !"body".equals(((Element) node).tagName()));
return depth;
public interface NodeWalkVisitor {
* Callback for when a node is first visited.
* @param node the node being visited.
* @param depth the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node
* of that will have depth 1.
* @return true to continue walk, false to abort
boolean head(Node node, int depth);
* Callback for when a node is last visited, after all of its descendants have been visited.
* @param node the node being visited.
* @param depth the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node
* of that will have depth 1.
* @return true to continue walk, false to abort
boolean tail(Node node, int depth);