如果文档没有,我认为设置 DOCTYPE 是不明智的。可能的解决方案是写一个假的,就像你已经做的那样。如果您使用的是 SAX,则可以使用这个假 InputStream 和假 DefaultHandler 实现。(仅适用于 latin1 单字节编码)
我知道这个解决方案也很丑陋,但它只有一个适用于大数据流。
这是一些代码。
private enum State {readXmlDec, readXmlDecEnd, writeFakeDoctipe, writeEnd};
private class MyInputStream extends InputStream{
private final InputStream is;
private StringBuilder sb = new StringBuilder();
private int pos = 0;
private String doctype = "<!DOCTYPE register SYSTEM \"fake.dtd\">";
private State state = State.readXmlDec;
private MyInputStream(InputStream source) {
is = source;
}
@Override
public int read() throws IOException {
int bit;
switch (state){
case readXmlDec:
bit = is.read();
sb.append(Character.toChars(bit));
if(sb.toString().equals("<?xml")){
state = State.readXmlDecEnd;
}
break;
case readXmlDecEnd:
bit = is.read();
if(Character.toChars(bit)[0] == '>'){
state = State.writeFakeDoctipe;
}
break;
case writeFakeDoctipe:
bit = doctype.charAt(pos++);
if(doctype.length() == pos){
state = State.writeEnd;
}
break;
default:
bit = is.read();
break;
}
return bit;
}
@Override
public void close() throws IOException {
super.close();
is.close();
}
}
private static class MyHandler extends DefaultHandler {
@Override
public InputSource resolveEntity(String publicId, String systemId) throws IOException, SAXException {
System.out.println("resolve "+ systemId);
// get real dtd
InputStream is = ClassLoader.class.getResourceAsStream("/register.dtd");
return new InputSource(is);
}
... // rest of code
}