-2

尝试解析 XML 文档时出现异常。

我经历了很多像这里这里这样的帖子。但我的问题仍然没有得到解决。我检查了标题中也没有任何空格。我用记事本创建了它,并在保存时选择了编码为 utf-8。

我的 XML 文件看起来像这样

<?xml version="1.0" encoding="UTF-8"?>
<poem>
<title>Roses are Red</title>
<l>Roses are red</l>
</poem>

我正在使用 java 加载文件并解析它。我的java代码是

File xml = new File("d:\\uploads\test.xml");
try{  
     XMLReader xr = XMLReaderFactory.createXMLReader();
     MySAXApp handler = new MySAXApp();
     xr.setContentHandler(handler);
     xr.setErrorHandler(handler);
     FileReader r = new FileReader(xml);
     xr.parse(new InputSource(r));
 }
 catch(Exception e)
 {
      log.info("Exception : "+e.getMessage());
 }

我的 MySAXApp 课程如下

package utility;
import java.io.FileReader;
import java.util.logging.Logger;
import org.xml.sax.XMLReader;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.helpers.XMLReaderFactory;
import org.xml.sax.helpers.DefaultHandler;

public class MySAXApp extends DefaultHandler {

    public Logger log;
    public MySAXApp ()
    {

        super();
        log = Logger.getAnonymousLogger();
    }
    public void startDocument ()
    {
        log.info("Start document");
    }


    public void endDocument ()
    {
        log.info("End document");
    }
    public void startElement (String uri, String name,String qName, Attributes atts)
    {

      log.info("Start element: " + qName);

    }

    public void endElement (String uri, String name, String qName)
    {
         log.info("End element: " + qName);
    }
    public void characters (char ch[], int start, int length)
    {
        log.info("values:    \"");
        for (int i = start; i < start + length; i++) {
            switch (ch[i]) {
            case '\\':
            log.info("\\\\");
            break;
            case '"':
            log.info("\\\"");
            break;
            case '\n':
            log.info("\\n");
            break;
            case '\r':
            log.info("\\r");
            break;
            case '\t':
            log.info("\\t");
            break;
            default:
            log.info(ch[i]+"");
            break;
            }
        }
        log.info("\"\n");
    }
}

堆栈跟踪

org.xml.sax.SAXParseException: Content is not allowed in prolog.
    at com.sun.org.apache.xerces.internal.util.ErrorHandlerWrapper.createSAXParseException(ErrorHandlerWrapper.java:195)

    at com.sun.org.apache.xerces.internal.util.ErrorHandlerWrapper.fatalError(ErrorHandlerWrapper.java:174)
    at com.sun.org.apache.xerces.internal.impl.XMLErrorReporter.reportError(XMLErrorReporter.java:388)
    at com.sun.org.apache.xerces.internal.impl.XMLScanner.reportFatalError(XMLScanner.java:1411)
    at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$PrologDriver.next(XMLDocumentScannerImpl.java:1038)
    at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:648)
    at com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:140)

    at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:510)
    at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:807)
    at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:737)
    at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:107)
    at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1205)

    at utility.PerformOperation.startIndexing(PerformOperation.java:91)
    at utility.Upload.doPost(Upload.java:126)
    at javax.servlet.http.HttpServlet.service(HttpServlet.java:647)
    at javax.servlet.http.HttpServlet.service(HttpServlet.java:728)
    at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:305)
    at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:210)

    at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:222)
    at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:123)
    at org.apache.catalina.authenticator.AuthenticatorBase.invoke(AuthenticatorBase.java:472)
    at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:171)
    at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:99)
    at org.apache.catalina.valves.AccessLogValve.invoke(AccessLogValve.java:953)
    at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:118)
    at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:408)
    at org.apache.coyote.http11.AbstractHttp11Processor.process(AbstractHttp11Processor.java:1023)
    at org.apache.coyote.AbstractProtocol$AbstractConnectionHandler.process(AbstractProtocol.java:589)
    at org.apache.tomcat.util.net.JIoEndpoint$SocketProcessor.run(JIoEndpoint.java:312)
    at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
    at java.lang.Thread.run(Thread.java:619)
4

2 回答 2

6

@MikeSokolov 在您的第一个链接中对此进行了解释。引用:

经常发生的另一件事是 UTF-8 BOM(字节顺序标记),如果将文档作为字符流而不是字节流传递给 XML 解析器,则在 XML 声明可以被视为空格之前允许使用它.

FileReader将文件作为字符流读取,而要将文件作为字节流读取,则应FileInputStream改为使用,如下所示:

FileInputStream is = new FileInputStream(xml);
xr.parse(new InputSource(is));

如果您在十六进制编辑器中检查您的文本文件,您将在开始时看到 UTF-8 BOM,(EF BB BF)这就是导致使用FileReader.

于 2013-09-06T13:54:49.463 回答
1

您可以使用以下方法将 XML 文件读取为 String,然后解析该 String。关键是在读取文件时指定编码。

public static void XMLtoString(File file) {

    String encoding = "";
    String str = "";

    try {
        // detect the encoding of the file
        CharsetDetector cd = new CharsetDetector().setText(new BufferedInputStream(new FileInputStream(file)));
        encoding = cd.detect().getName();

        // to avoid the BOM ("byte order mark") being added to the String, encoding is specified as a parameter
        str = FileUtils.readFileToString(file, encoding);
    }
    catch (IOException e) {
        System.err.println("Caught IOException: " + e.getMessage());
    }
}
于 2013-10-31T10:03:06.470 回答