这是示例 XML:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<check>
<val>
<Samsung>
<name value="galaxy" />
<name value="galaxy" />
<name value="galaxys" />
<id value="123" />
<id value="123" />
<cal>23</cal>
<cal>23</cal>
<name2 value="galaxy" />
</Samsung>
<htc>
<name value="galaxy" />
<name value="galaxy" />
<name value="galaxys" />
<id value="123" />
<id value="123" />
<name2 value="galaxy" />
</htc>
</val>
</check>
这是我编写的 java 代码,它找到重复的元素(包括它们的属性)并删除它们:
import java.io.File;
import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class RecursiveNodeCheck {
/**
* @param args
*/
public static String parent;
public static void main(String[] args) {
// TODO Auto-generated method stub
String path="D:/vodafone/parse.xml";
try {
File file = new File(path);
DocumentBuilder dBuilder = DocumentBuilderFactory.newInstance()
.newDocumentBuilder();
Document doc = dBuilder.parse(file);
System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
parent=doc.getDocumentElement().getNodeName();
String name="//"+parent+"/*";
XPathExpression expr = XPathFactory.newInstance().newXPath()
.compile(name);
NodeList list = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
recursive(doc,list,path);
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
private static void recursive(Document doc,NodeList list,String path) throws XPathExpressionException, TransformerException, SAXException, ParserConfigurationException, IOException {
// TODO Auto-generated method stub
for (int count = 0; count < list.getLength(); count++) {
Node tempNode = list.item(count);
if(!(tempNode.getNodeName().equalsIgnoreCase(parent) )){
if(!tempNode.hasChildNodes()&& tempNode.getNodeType() == Node.ELEMENT_NODE){
Node head= tempNode.getParentNode();
Node current=head.getFirstChild();
String exp=null;
while(current!=null ){
if (current.getNodeType() != Node.ELEMENT_NODE ){
current=current.getNextSibling();
}else{
if( current.hasAttributes() ){
String key = current.getAttributes().getNamedItem("value").getNodeValue();
String value="";
if(current.getNodeValue()!=null){
value = current.getNodeValue();
}
exp= "//"+head.getNodeName()+"/"+current.getNodeName()+"[@value='"+key+"']"+value;
DelElements(doc,exp,path);
current = current.getNextSibling();
}
else{
exp= "//"+head.getNodeName()+"/"+current.getNodeName()+"/text()";
DelElements(doc,exp,path);
current = current.getNextSibling();
}
}
}
}else{
recursive(doc,tempNode.getChildNodes(),path);
}}
if (tempNode.hasChildNodes()) {
// loop again if has child nodes
recursive(doc,tempNode.getChildNodes(),path);
}
System.out.println("Node Name =" + tempNode.getNodeName() + " [CLOSE]");
}
}
private static void DelElements(Document doc, String exp,String path) throws TransformerException, SAXException, ParserConfigurationException, IOException {
// TODO Auto-generated method stub
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
NodeList nodes = null;
try {
doc = factory.newDocumentBuilder().parse(new File(path));
XPathExpression expr = XPathFactory.newInstance().newXPath()
.compile(exp);
nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
} catch (XPathExpressionException e) {
e.printStackTrace();
}
for ( int i= nodes.getLength()-1;i>0; i--) {
System.out.println("."); //progress indicator
if (nodes.item(i).getNodeType() == Node.TEXT_NODE ){
nodes.item(i).getParentNode().removeChild( nodes.item(i));
}
else{
Element el = (Element) ( nodes.item(i));
el.getParentNode().removeChild(el);
}
}
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new File(path));
transformer.transform(source, result);
// TODO Auto-generated method stub
}
}
}
这是我得到的输出:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<check>
<val>
<Samsung>
<name value="galaxy"/>
<name value="galaxys"/>
<id value="123"/>
<cal>23</cal>
**<cal/>**
<name2 value="galaxy"/>
</Samsung>
<htc>
<name value="galaxy"/>
<name value="galaxys"/>
<id value="123"/>
<name2 value="galaxy"/>
</htc>
</val>
</check>
在上面的输出 xml 中,<cal/>
标签仍然存在。我应该如何删除这个?