2

我正在寻找动态修改一个非常大的 XML 文件的标签的最佳方法。

考虑以下输入 XML:

输入

<?xml version="1.0" encoding="UTF-8"?>
<rootTag>
   <dictionary>
      <name>field1</name>
      <address>field2</address>
      <gender>field3</gender>
      .
      .
      <postcode>field30</postcode>
   </dictionary>
   <records>
      <record>
         <field id="field1">John</field>
         <field id="field2">Svalbard</field>
         <field id="field3">M</field>
         .
         .
         <field id="field30">12345</field>
      </record>
      .
      .
      <record>
      .
      .
      </record>
   </records>
</rootTag>

XML 文件在顶部包含一个字典和大量记录节点,它们的标签链接到字典。

我想将每个记录节点中的标签替换为字典中的相应值。因此,输出应如下所示:

输出

<?xml version="1.0" encoding="UTF-8"?>
<rootTag>
   <records>
      <record>
         <name>John</name>
         <address>Svalbard</address>
         <gender>M</gender>
         .
         .
         <postcode>12345</postcode>
      </record>
      .
      .
      <record>
      .
      .
      </record>
   </records>
</rootTag>

请记住,有大量的<record>节点,在 Java 中实现这种转换的最佳方法是什么?

请注意,我只想更改标签而不是属性。

4

6 回答 6

1

我同意@PeterJaloveczki 的观点,xslt 可能是这样。以下可以完成工作

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:fn="http://www.w3.org/2005/xpath-functions">
    <xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>

    <xsl:template match="node()|@*">
        <xsl:copy>
            <xsl:apply-templates select="node() | @*" />
        </xsl:copy>
    </xsl:template>

    <xsl:template match="dictionary" />

    <xsl:template match="field">
        <xsl:variable name="id" select="@id" />
        <xsl:variable name="tagName" select="/rootTag/dictionary/node()[. = $id]/name()" />

        <xsl:element name="{if ($tagName != '') then $tagName else 'field'}">
            <xsl:apply-templates select="node() | @*[name() != 'id']" />
        </xsl:element>
    </xsl:template>

</xsl:stylesheet>

它在某些方面被简化了,因为 xml 示例也被简化了,但基本上它应该可以工作。

于 2013-08-06T11:13:52.423 回答
0

可能使用 XSLT 是您最好的选择。

于 2013-08-06T10:23:29.847 回答
0

我可能会使用 SAX XML 解析器,它可以确保您不会一次加载整个 DOM 树。

简而言之,您将首先填充一个字典,然后,对于每个标签,在您解析它们时,将其名称替换为字典包含的任何内容。

关于如何在 Java 中处理 SAX 配对的示例:http: //docs.oracle.com/javase/tutorial/jaxp/sax/parsing.html

于 2013-08-06T10:30:38.297 回答
0

一种选择是使用StAX,它具有高性能,将xml作为流处理,无需将整个xml加载到内存中,使用方便。

于 2013-08-06T10:46:46.093 回答
0

SAX Parser 是一种可行的方法,因为它将 XML 解析为流,而不是一次性读取它。有关详细信息,请参见: http: //docs.oracle.com/javase/tutorial/jaxp/sax/parsing.html

于 2013-08-06T11:03:19.453 回答
0

为什么不手动解析 XML?

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import junit.framework.Assert;

import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class ReplaceTextInXmlTest
{
   @Test
   public void test(
   ) {
      try {

         final String inputXml = new String(
            "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
            "<rootTag>\n" +
            "   <dictionary>\n" +
            "      <name>field1</name>\n" +
            "      <address>field2</address>\n" +
            "      <gender>field3</gender>\n" +
            "   </dictionary>\n" +
            "   <records>\n" +
            "      <record>\n" +
            "         <field id=\"field1\">John</field>\n" +
            "         <field id=\"field2\">Svalbard</field>\n" +
            "         <field id=\"field3\">M</field>\n" +
            "      </record>\n" +
            "         <field id=\"field1\">Fritz</field>\n" +
            "         <field id=\"field2\">Hamburg</field>\n" +
            "         <field id=\"field3\">M</field>\n" +
            "      </record>\n" +
            "   </records>\n" +
            "</rootTag>"
         );
         final Map<Integer, String> mapping = new HashMap<>();
         final int start = inputXml.indexOf("<dictionary>");
         final int end = inputXml.indexOf("</dictionary>", start) + 13; // "</dictionary>".length() = 13
         final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
         final DocumentBuilder db = dbf.newDocumentBuilder();
         Document dom = null;
         try (
            ByteArrayInputStream is = new ByteArrayInputStream(inputXml.substring(start,    end).getBytes());
         ) {
            dom = db.parse(is);
         }
         final Element root = dom.getDocumentElement();
         final NodeList nodes = root.getChildNodes();
         for(int i = 0, z = nodes.getLength(); i < z; ++i) {
            final Node node = nodes.item(i);
            final int type = node.getNodeType();
            if(type == 1) {
               final String name = node.getNodeName();
               final String value = node.getTextContent();
               mapping.put(new Integer(Integer.parseInt(value.substring(5))), name);  // "field".length() = 5
            }
         }

         final Pattern fieldPattern = Pattern.compile("^(\\s*<)field id=\"field([0-9]+)\"   (>[^<]*</)field(>\\s*)$");
         final StringBuilder outputXml = new StringBuilder();
         try (
            BufferedReader reader = new BufferedReader(new StringReader(inputXml));
         ) {
            String line = null;
            while ((line = reader.readLine()) != null) {
               final Matcher match = fieldPattern.matcher(line);
               if(match.find() == true) {
                  final int fieldId = Integer.parseInt(match.group(2));
                  final String tagName = mapping.get(new Integer(fieldId));
                  outputXml.append(match.group(1));
                  outputXml.append(tagName);
                  outputXml.append(match.group(3));
                  outputXml.append(tagName);
                  outputXml.append(match.group(4));
               } else {
                  outputXml.append(line);
               }
               outputXml.append('\n');
            }
         }

         final String expectedXml = new String(
            "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
            "<rootTag>\n" +
            "   <dictionary>\n" +
            "      <name>field1</name>\n" +
            "      <address>field2</address>\n" +
            "      <gender>field3</gender>\n" +
            "   </dictionary>\n" +
            "   <records>\n" +
            "      <record>\n" +
            "         <name>John</name>\n" +
            "         <address>Svalbard</address>\n" +
            "         <gender>M</gender>\n" +
            "      </record>\n" +
            "         <name>Fritz</name>\n" +
            "         <address>Hamburg</address>\n" +
            "         <gender>M</gender>\n" +
            "      </record>\n" +
            "   </records>\n" +
            "</rootTag>\n"
         );
         Assert.assertEquals(expectedXml, outputXml.toString());

      } catch (final Exception e) {
         Assert.fail(e.getMessage());
      }
   }
}
于 2013-08-06T11:42:33.757 回答