2

我想解析一个 XML 文档并使其表示尽可能接近源,特别是我想获得 ENTITY_REFERENCE 节点。但是我得到了 ENTITY_REFERENCE 节点,后跟一个 TEXT_NODE 代表实体引用的扩展。

import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringReader;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Document;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSOutput;
import org.w3c.dom.ls.LSSerializer;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public class Main {

    public static void main(String[] args) throws Exception {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setExpandEntityReferences(false);
        DocumentBuilder builder = factory.newDocumentBuilder();

        final String xml = new String(
                    "<?xml version=\"1.0\"?>" + 
                    "<!DOCTYPE simple SYSTEM \"simple.dtd\" [" + 
                    "<!ENTITY a \"abhijeet\">" + 
                    "]>" +
                    "<simple> &a;   </simple>");

        builder.setEntityResolver(new EntityResolver() {

            @Override
            public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
                return new InputSource(new StringReader(""));
            }
        });
        Document document = builder.parse(new InputSource(new StringReader(xml)));

        final DOMImplementationLS domImplementationLS = (DOMImplementationLS) builder.getDOMImplementation();
        LSSerializer LSSerializer = domImplementationLS.createLSSerializer();
        LSOutput LSOutput = domImplementationLS.createLSOutput();
        LSOutput.setCharacterStream(new PrintWriter(System.out));
        LSSerializer.write(document, LSOutput);
    }

}

如果你喜欢在这里运行代码:http: //ideone.com/Rldi2S

热点结果:Java(TM) SE Runtime Environment (build 1.7.0_15-b03) Java(TM) SE Runtime Environment (build 1.7.0_17-b02) Java(TM) SE Runtime Environment (build 1.6.0_26-b03)

是一样的:

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE simple SYSTEM "simple.dtd" [<!ENTITY a 'abhijeet'>
]>
<simple> &a;abhijeet   </simple>

其中“&a;” 是实体引用节点,后跟它的扩展“abhijeet”文本节点。

我的预期是:

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE simple SYSTEM "simple.dtd" [<!ENTITY a 'abhijeet'>
]>
<simple> &a;   </simple>

是我缺乏知识、代码中的错误还是解析器坏了?

4

0 回答 0