2

我正在使用 java default documentbuilder 来解析一个少于 100 行代码的 xml 文档。解析一个文档需要 35 毫秒,执行单个 xpath 表达式需要 15 毫秒。如何优化 xml 和解析器所花费的时间?.

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;


public class XMLParser {


    public static final Logger LOGGER = Logger.getLogger(XMLParser.class.getName());

    private Map<String,List<NamedNodeMap>> fileVsProperties = new HashMap<String, List<NamedNodeMap>>();

    private Document document;

    public XMLParser(File file){
            this.document = XMLUtil.getDocument(file);
    }

    public void setProperties(Element file){
        NodeList properties = file.getElementsByTagName("property");
        List<NamedNodeMap> props = new ArrayList<NamedNodeMap>();
        String type = file.getAttribute("type");
        String path = file.getAttribute("path");

        if("".equals(path)){
            LOGGER.log(Level.INFO,"Attribute path is required for a file.");
            return;
        }

        path = path+":"+type;

        for(int i = 0;i<properties.getLength();i++){
            Element property = (Element) properties.item(i);
            props.add(property.getAttributes());
        }
        setProperties(props,path);
    }

    private void setProperties(List<NamedNodeMap> properties , String path){
        List<NamedNodeMap>  previousValue = fileVsProperties.get(path);
        if(previousValue != null){
            previousValue.addAll(properties);
        }else{
            fileVsProperties.put(path,properties);
        }

    }

    public Element getConfiguration(String branchName) throws XPathExpressionException{
        return (Element)XMLUtil.getElements("/configurations/configuration[@name='"+branchName+"']",document.getDocumentElement(),XPathConstants.NODE);
    }

    public static void main(String[] args) throws XPathExpressionException {
        long start = System.currentTimeMillis();
        File doc = new File("install.xml");
        XMLParser parser = new XMLParser(doc);
        long end = System.currentTimeMillis();
        System.out.println("Time Taken For Parsing :: "+ (end-start) + " milliseconds");
        start = end;
        Element configuration = parser.getConfiguration("BHARATHIKANNAN");
        end = System.currentTimeMillis();
        System.out.println("Time Taken For XPATH Expression TO Finding the Configuration :: "+ (end-start) + " milliseconds");
        start = end;
        NodeList files = parser.getFiles(configuration);
        for(int i=0;i<files.getLength();i++){
            parser.setProperties((Element) files.item(i));
        }
        end = System.currentTimeMillis();
        System.out.println(parser.fileVsProperties);
        System.out.println("Time Taken For Setting Properties :: "+ (end-start) + " milliseconds");
    }

    public NodeList getFiles(Element configuration){
        return configuration.getElementsByTagName("file");
    }

}


class XMLUtil{
    private static DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    private static DocumentBuilder builder;
    public static final Logger LOGGER = Logger.getLogger(XMLUtil.class.getName());

    private static XPathFactory xpathFactory = XPathFactory.newInstance();

    private static XPath xpath;

    static {

        try {
            builder = factory.newDocumentBuilder();
            xpath = xpathFactory.newXPath();
        } catch (ParserConfigurationException e) {
            LOGGER.log(Level.INFO,"");
        }
    }

    public static Document getDocument(File f){
        Document doc = null;
        try {
            doc = builder.parse(f);
        } catch (SAXException e) {
            LOGGER.log(Level.WARNING,"Invalid XML Document ",e);
        } catch (IOException e) {
            LOGGER.log(Level.SEVERE,"No Document Found in the given path",e);
        }
        return doc;
    }

    public static Object getElements(String xpathExpression , Element ele ,QName dataType) throws XPathExpressionException{
        return xpath.evaluate(xpathExpression, ele,dataType);
    }


}

XML 文件

    <?xml version="1.0"?>
<!--
        Note : Default configuration loaded using your current branch name . You can extend configurations using extend attribute in configuration
        node . 
-->
<configurations>
        <configuration name="default">
                <files>
                        <file type="xml" path="conf/server.xml.orig">
                                <property regex="(port=).*" replace="\18080" xpath="/Server/Connector"></property>
                                <property regex="(port=).*" replace="\18080"></property>
                        </file>
                        <file type="text" path="conf/system_properties.conf">
                                <property regex="(username=).*" replace="\1root" ></property>
                        </file>
                </files>
        </configuration>
        <configuration name="BHARATHIKANNAN" extends="default">
                <files>
                        <file type="text" path="conf/system_properties.conf">
                                <property regex="(username=).*" replace="\1root" ></property>
                        </file>
                </files>
        </configuration>
</configurations>

输出 :

Time Taken For Parsing :: 24 milliseconds
Time Taken For XPATH Expression TO Finding the Configuration :: 14 milliseconds
{conf/system_properties.conf:text=[com.sun.org.apache.xerces.internal.dom.AttributeMap@75d9fd51]}
Time Taken For Setting Properties :: 0 milliseconds
4

1 回答 1

0

最近有人问了一个非常相似的任务,但文件更大(2Mb),我在这里给出了一些撒克逊人的时间:

https://stackoverflow.com/questions/12497928/xpath-speed-comparision/12508614#12508614

在更大的文档上,这些时间比您看到的要快得多。由于您已经在使用 Java,因此切换到 Saxon 应该非常简单。

不过需要注意的是,您在进入 main() 时立即开始计时,这意味着您主要测量类加载时间而不是 XML 处理时间。在测量开始之前,我的测量小心地预热了 Java VM。

请注意,如果您使用的是 Saxon,最好使用 Saxon 的本地树模型,而不是 DOM 或其他替代方案。我们最近在这里发布了一些测量结果:

http://dev.saxonica.com/blog/mike/2012/09/index.html#000194

DOM 的平均结果是撒克逊原生树的 8 倍,最坏的情况是 23 倍。

于 2012-09-21T13:41:38.367 回答