我正在使用 javax.imageio 从 PNG 图像中提取元数据。这工作正常。但是获取实际元数据的 getAsTree 方法返回无效的 XML。所以我不知道如何解析这个 XML 以获得某些元数据:
run:
Format name: javax_imageio_png_1.0
<javax_imageio_png_1.0>
<IHDR width="256" height="256" bitDepth="8" colorType="RGBAlpha" compressionMethod="deflate" filterMethod="adaptive" interlaceMethod="none"/>
<cHRM whitePointX="31269" whitePointY="32899" redX="63999" redY="33001" greenX="30000" greenY="60000" blueX="15000" blueY="5999"/>
<gAMA value="45454"/>
<iTXt>
<iTXtEntry keyword="XML:com.adobe.xmp" compressionFlag="FALSE" compressionMethod="0" languageTag="" translatedKeyword="" text="<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Adobe XMP Core 5.0-c061 64.140949, 2010/12/07-10:57:01 ">
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description rdf:about=""
xmlns:xmp="http://ns.adobe.com/xap/1.0/"
xmlns:xmpMM="http://ns.adobe.com/xap/1.0/mm/"
xmlns:stEvt="http://ns.adobe.com/xap/1.0/sType/ResourceEvent#"
xmlns:lr="http://ns.adobe.com/lightroom/1.0/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmp:MetadataDate="2012-12-05T21:36:19+01:00"
xmpMM:InstanceID="xmp.iid:EF7F11740720681192B08F682498C71D"
xmpMM:DocumentID="xmp.did:FC7F11740720681192B0AE5890E66CAE"
xmpMM:OriginalDocumentID="xmp.did:FC7F11740720681192B0AE5890E66CAE">
<xmpMM:History>
<rdf:Seq>
<rdf:li
stEvt:action="saved"
stEvt:instanceID="xmp.iid:FC7F11740720681192B0AE5890E66CAE"
stEvt:when="2012-12-04T00:23:34+01:00"
stEvt:changed="/metadata"/>
<rdf:li
stEvt:action="saved"
stEvt:instanceID="xmp.iid:EF7F11740720681192B08F682498C71D"
stEvt:when="2012-12-05T21:36:19+01:00"
stEvt:changed="/metadata"/>
</rdf:Seq>
</xmpMM:History>
<lr:hierarchicalSubject>
<rdf:Bag>
<rdf:li>Component|Software</rdf:li>
<rdf:li>Places|Paris</rdf:li>
<rdf:li>Product|Christensen</rdf:li>
<rdf:li>Product|Simba</rdf:li>
</rdf:Bag>
</lr:hierarchicalSubject>
<dc:subject>
<rdf:Bag>
<rdf:li>Christensen</rdf:li>
<rdf:li>Paris</rdf:li>
<rdf:li>Simba</rdf:li>
<rdf:li>Software</rdf:li>
</rdf:Bag>
</dc:subject>
</rdf:Description>
</rdf:RDF>
</x:xmpmeta>
<?xpacket end="r"?>"/>
</iTXt>
<pHYs pixelsPerUnitXAxis="2835" pixelsPerUnitYAxis="2835" unitSpecifier="meter"/>
</javax_imageio_png_1.0>
Format name: javax_imageio_1.0
<javax_imageio_1.0>
<Chroma>
<ColorSpaceType name="RGB"/>
<NumChannels value="4"/>
<Gamma value="0.45453998"/>
<BlackIsZero value="TRUE"/>
</Chroma>
<Compression>
<CompressionTypeName value="deflate"/>
<Lossless value="TRUE"/>
<NumProgressiveScans value="1"/>
</Compression>
<Data>
<PlanarConfiguration value="PixelInterleaved"/>
<SampleFormat value="UnsignedIntegral"/>
<BitsPerSample value="8 8 8 8"/>
</Data>
<Dimension>
<PixelAspectRatio value="1.0"/>
<ImageOrientation value="Normal"/>
<HorizontalPixelSize value="0.35273367"/>
<VerticalPixelSize value="0.35273367"/>
</Dimension>
<Text>
<TextEntry keyword="XML:com.adobe.xmp" value="<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Adobe XMP Core 5.0-c061 64.140949, 2010/12/07-10:57:01 ">
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description rdf:about=""
xmlns:xmp="http://ns.adobe.com/xap/1.0/"
xmlns:xmpMM="http://ns.adobe.com/xap/1.0/mm/"
xmlns:stEvt="http://ns.adobe.com/xap/1.0/sType/ResourceEvent#"
xmlns:lr="http://ns.adobe.com/lightroom/1.0/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmp:MetadataDate="2012-12-05T21:36:19+01:00"
xmpMM:InstanceID="xmp.iid:EF7F11740720681192B08F682498C71D"
xmpMM:DocumentID="xmp.did:FC7F11740720681192B0AE5890E66CAE"
xmpMM:OriginalDocumentID="xmp.did:FC7F11740720681192B0AE5890E66CAE">
<xmpMM:History>
<rdf:Seq>
<rdf:li
stEvt:action="saved"
stEvt:instanceID="xmp.iid:FC7F11740720681192B0AE5890E66CAE"
stEvt:when="2012-12-04T00:23:34+01:00"
stEvt:changed="/metadata"/>
<rdf:li
stEvt:action="saved"
stEvt:instanceID="xmp.iid:EF7F11740720681192B08F682498C71D"
stEvt:when="2012-12-05T21:36:19+01:00"
stEvt:changed="/metadata"/>
</rdf:Seq>
</xmpMM:History>
<lr:hierarchicalSubject>
<rdf:Bag>
<rdf:li>Component|Software</rdf:li>
<rdf:li>Places|Paris</rdf:li>
<rdf:li>Product|Christensen</rdf:li>
<rdf:li>Product|Simba</rdf:li>
</rdf:Bag>
</lr:hierarchicalSubject>
<dc:subject>
<rdf:Bag>
<rdf:li>Christensen</rdf:li>
<rdf:li>Paris</rdf:li>
<rdf:li>Simba</rdf:li>
<rdf:li>Software</rdf:li>
</rdf:Bag>
</dc:subject>
</rdf:Description>
</rdf:RDF>
</x:xmpmeta>
<?xpacket end="r"?>" language="" compression="none"/>
</Text>
<Transparency>
<Alpha value="nonpremultipled"/>
</Transparency>
</javax_imageio_1.0>
BUILD SUCCESSFUL (total time: 3 seconds)
无效的 XML 从 iTXtEntry 元素开始,该元素具有 xpacket 位并包含子元素,尽管它具有自闭合标记格式,而不是结束标记。因此,当我尝试使用 DOM 文档和 xpath 解析它时,我收到一条错误消息,指出该元素的内容中不能包含“>”。
我在 DocumentBuilderFactory 上禁用了 DTD 验证。这没有帮助。我觉得我正在使用正则表达式,但这似乎不对。为什么我首先从 imageio 中的 getAsTree 方法得到无效的 XML,我该怎么办?