我有一本书的 XML 文件。主树有Body/Pagecolumn/Region/Paragraph/Line/Word级别。但是,我对Line级别不感兴趣。有没有办法在不破坏R 中使用 XML 包或任何其他包的Word级别的情况下融合Line级别?转换后,主树为Body/Pagecolumn/Region/Paragraph/Word
下面提供了 XML 数据的示例:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE DjVuXML>
<DjVuXML>
<BODY>
<OBJECT data="file://localhost//book1.djvu" height="1650" type="image/x.djvu" usemap="book1.djvu" width="1275">
<PARAM name="PAGE" value="book1_001.djvu"/>
<PARAM name="DPI" value="300"/>
<HIDDENTEXT>
<PAGECOLUMN>
<REGION>
<PARAGRAPH>
<LINE>
<WORD coords="1,2,3,4,5">Title</WORD>
</LINE>
</PARAGRAPH>
</REGION>
</PAGECOLUMN>
<PAGECOLUMN>
<REGION>
<PARAGRAPH>
<LINE>
<WORD coords="30,564,90,545,559">This</WORD>
<WORD coords="97,559,109,545,559">is</WORD>
<WORD coords="115,564,162,545,559">a</WORD>
</LINE>
</PARAGRAPH>
<PARAGRAPH>
<LINE>
<WORD coords="30,589,80,570,584">First</WORD>
<WORD coords="88,584,115,570,584">line</WORD>
<WORD coords="123,584,146,574,584">is</WORD>
</LINE>
<LINE>
<WORD coords="30,614,90,598,609">Second</WORD>
<WORD coords="97,609,143,595,609">line</WORD>
<WORD coords="148,614,168,595,609">is</WORD>
</LINE>
<LINE>
<WORD coords="30,640,56,626,640">Third</WORD>
<WORD coords="63,640,95,626,640">line</WORD>
<WORD coords="101,640,128,626,640">is</WORD>
</LINE>
</PARAGRAPH>
</REGION>
</PAGECOLUMN>
</HIDDENTEXT>
</OBJECT>
<MAP name="book1.djvu"/>
</BODY>
</DjVuXML>
谢谢。