25

我正在使用 Java 6。我的 pom 中有这个依赖项......

            <dependency>
                    <groupId>xerces</groupId>
                    <artifactId>xercesImpl</artifactId>
                    <version>2.10.0</version>
            </dependency>

我正在尝试用这一行解析 XHTML 文档

   <!--[if gte mso 9]><xml>  <w:WordDocument>   <w:View>Normal</w:View>   <w:Zoom>0</w:Zoom>   <w:TrackMoves/>   <w:TrackFormatting/>   <w:PunctuationKerning/>   <w:ValidateAgainstSchemas/>   <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>   <w:IgnoreMixedContent>false</w:IgnoreMixedContent>   <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>   <w:DoNotPromoteQF/>   <w:LidThemeOther>EN-US</w:LidThemeOther>   <w:LidThemeAsian>JA</w:LidThemeAsian>   <w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript>   <w:Compatibility>    <w:BreakWrappedTables/>    <w:SnapToGridInCell/>    <w:WrapTextWithPunct/>    <w:UseAsianBreakRules/>    <w:DontGrowAutofit/>    <w:SplitPgBreakAndParaMark/>    <w:EnableOpenTypeKerning/>    <w:DontFlipMirrorIndents/>    <w:OverrideTableStyleHps/>    <w:UseFELayout/>   </w:Compatibility>   <m:mathPr>    <m:mathFont m:val="Cambria Math"/>    <m:brkBin m:val="before"/>    <m:brkBinSub m:val="--"/>    <m:smallFrac m:val="off"/>    <m:dispDef/>    <m:lMargin m:val="0"/>    <m:rMargin m:val="0"/>    <m:defJc m:val="centerGroup"/>    <m:wrapIndent m:val="1440"/>    <m:intLim m:val="subSup"/>    <m:naryLim m:val="undOvr"/>   </m:mathPr></w:WordDocument> </xml><![endif]-->

使用此代码...

    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setValidating(false);
    factory.setExpandEntityReferences(false);
    factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
    final DocumentBuilder builder = factory.newDocumentBuilder();
    final InputSource s = new InputSource(new StringReader(str));
    org.w3c.dom.Document result = builder.parse(s);

但我的解析正在死亡,但出现以下异常......

[Fatal Error] :91:947: The string "--" is not permitted within comments.
org.xml.sax.SAXParseException: The string "--" is not permitted within comments.
    at org.apache.xerces.parsers.DOMParser.parse(Unknown Source)
    at org.apache.xerces.jaxp.DocumentBuilderImpl.parse(Unknown Source)
    at com.myco.myproject.util.XmlUtilities.getStringAsDocument(XmlUtilities.java:201)
    at com.myco.myproject.util.NetUtilities.getUrlAsDocument(NetUtilities.java:67)
    at com.myco.myproject.parsers.impl.ForesightEventsParser.getEventsFromElement(ForesightEventsParser.java:133)
    at com.myco.myproject.parsers.impl.ForesightEventsParser.parsePage(ForesightEventsParser.java:99)
    at com.myco.myproject.parsers.impl.ForesightEventsParser.getEvents(ForesightEventsParser.java:58)
    at com.myco.myproject.domain.EventFeed.refresh(EventFeed.java:87)
    at com.myco.myproject.domain.EventFeed.getEvents(EventFeed.java:72)
    at com.myco.myproject.parsers.impl.ForesightParserTest.testParser(ForesightParserTest.java:49)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
    at java.lang.reflect.Method.invoke(Method.java:597)
    at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:44)
    at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:15)
    at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:41)
    at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:20)
    at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:28)
    at org.springframework.test.context.junit4.statements.RunBeforeTestMethodCallbacks.evaluate(RunBeforeTestMethodCallbacks.java:74)
    at org.springframework.test.context.junit4.statements.RunAfterTestMethodCallbacks.evaluate(RunAfterTestMethodCallbacks.java:83)
    at org.springframework.test.context.junit4.statements.SpringRepeat.evaluate(SpringRepeat.java:72)
    at org.springframework.test.context.junit4.SpringJUnit4ClassRunner.runChild(SpringJUnit4ClassRunner.java:231)
    at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:50)
    at org.junit.runners.ParentRunner$3.run(ParentRunner.java:193)
    at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:52)
    at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:191)
    at org.junit.runners.ParentRunner.access$000(ParentRunner.java:42)
    at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:184)
    at org.springframework.test.context.junit4.statements.RunBeforeTestClassCallbacks.evaluate(RunBeforeTestClassCallbacks.java:61)
    at org.springframework.test.context.junit4.statements.RunAfterTestClassCallbacks.evaluate(RunAfterTestClassCallbacks.java:71)
    at org.junit.runners.ParentRunner.run(ParentRunner.java:236)
    at org.springframework.test.context.junit4.SpringJUnit4ClassRunner.run(SpringJUnit4ClassRunner.java:174)
    at org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:50)
    at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38)
    at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:467)
    at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:683)
    at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:390)
    at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:197)

在不更改我的 XHTML 的情况下,任何人都知道如何成功解析此文档吗?

编辑根据给出的评论,我从原始问题中删除了“格式良好”一词。我仍然对如何在不更改我正在解析的文本(我无法控制)的情况下消除此异常非常感兴趣。出于这个问题的目的,您可以假设评论中的“--”是唯一违反“格式良好”一词的行为。

4

2 回答 2

22

根据定义

注释以“--”开头和结尾,并且不包含任何“--”。

所以不,您的 XHTML 格式不正确,因为您不能--在注释中的任何地方使用。可以用别的东西代替吗?或者可能在两者之间放置一个空格,如下所示:- -. 对于这个问题确实没有一个干净的解决方案,任何替代方案都涉及搞乱占位符、编码等。

于 2012-04-14T18:02:14.603 回答
2

您的文档必须至少有一个额外的“-”。可能你写过:

<!--- --> or
<!--- ---> or
<!-- ---> etc.
于 2019-11-19T14:06:35.140 回答