我有一个如下示例的 XML,它最多可以包含 5000 行,但为了有点合理,我将其限制为 20 行。
<PMT NM="rnt-model">
<PV V="L11-L23-L3448-L42375_MODEL1" C="1"></PV>
<PV V="L11-L23-L3448-L448_MODEL2" C="1"></PV>
<PV V="L11-L23-L3448-L448_MODEL3" C="1"></PV>
<PV V="L11-L23-L3448-L448_MODEL4" C="1"></PV>
<PV V="L11-L23-L3448-L448_MODEL5" C="2"></PV>
<PV V="L11-L24-L319-L493_MODEL6" C="1"></PV>
<PV V="L11-L25-L3288-L41931_MODEL7" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL8" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL9" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL10" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL11" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL12" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL13" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL14" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL15" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL16" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL17" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL18" C="1"></PV>
<PV V="L110-L254-L3218-L41307_MODEL19" C="1"></PV>
<PV V="L110-L254-L3218-L41307_MODEL20" C="1"></PV>
</PMT>
XML 必须从这种伪平面格式转换为基于@V 的树结构。@V 需要首先根据下划线拆分为 2,然后在连字符上进行标记。
或者为了让它更明显,下面是预期的结果。
<root>
<n id="L11">
<n id="L23">
<n id="L3448">
<n id="L42375">
<n m="MODEL1" c="1"></n>
</n>
<n id="L448">
<n m="MODEL2" c="1"></n>
<n m="MODEL3" c="1"></n>
<n m="MODEL4" c="1"></n>
<n m="MODEL5" c="2"></n>
</n>
</n>
</n>
<!-- rest of rows below -->
我设法让它在 XSLT 下面工作,当行数很少时它工作得很好。但是,当使用真正的实时 XML 时,生成树需要很长时间,所以我想知道如何让事情变得更有效率。在 XSLT2 中执行此操作非常简单,但是对于我正在处理的项目,我坚持使用 1.0。
使用的 XSLT 代码(工作但效率不高)
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
<xsl:key name="level1" use="@L1" match="row"/>
<xsl:key name="level2" use="@L2" match="row"/>
<xsl:key name="level3" use="@L3" match="row"/>
<xsl:key name="level4" use="@L4" match="row"/>
<xsl:template match="//PMT[@NM='rnt-model']">
<root>
<!-- Step 1 : generate a table with all the tokenized attributes -->
<xsl:variable name="theTree">
<xsl:for-each select="PV">
<row>
<xsl:for-each select="@V">
<xsl:call-template name="tokenize_tree">
<xsl:with-param name="list" select="substring-before(.,'_')"/>
<xsl:with-param name="delimiter" select="'-'"/>
</xsl:call-template>
<xsl:attribute name="M"><xsl:value-of select="substring-after(.,'_')"/></xsl:attribute>
</xsl:for-each>
<xsl:attribute name="C"><xsl:value-of select="@C"/></xsl:attribute>
</row>
</xsl:for-each>
</xsl:variable>
<!-- Step 2 : Group all -->
<xsl:for-each select="$theTree//row[generate-id()=generate-id(key('level1',@L1)[1])]">
<xsl:variable name="theType" select="@L1"/>
<n id="{$theType}">
<xsl:for-each select="$theTree/row[@L1=$theType][generate-id()=generate-id(key('level2',@L2)[1])]">
<xsl:variable name="theCat" select="@L2"/>
<n id="{$theCat}">
<xsl:for-each select="$theTree/row[@L2=$theCat][generate-id()=generate-id(key('level3',@L3)[1])]">
<xsl:variable name="theSubCat" select="@L3"/>
<n id="{$theSubCat}">
<xsl:for-each select="$theTree/row[@L3=$theSubCat][generate-id()=generate-id(key('level4',@L4)[1])]">
<xsl:variable name="theSerie" select="@L4"/>
<n id="{$theSerie}">
<xsl:for-each select="$theTree/row[@L4=$theSerie]">
<n m="{@M}" c="{@C}"/>
</xsl:for-each>
</n>
</xsl:for-each>
</n>
</xsl:for-each>
</n>
</xsl:for-each>
</n>
</xsl:for-each>
</root>
</xsl:template>
<xsl:template name="tokenize_tree">
<!--passed template parameter -->
<xsl:param name="list"/>
<xsl:param name="delimiter"/>
<xsl:choose>
<xsl:when test="contains($list, $delimiter)">
<xsl:attribute name="{substring(substring-before($list,$delimiter),1,2)}"><xsl:value-of select="substring-before($list,$delimiter)"/></xsl:attribute>
<!-- get everything in front of the first delimiter -->
<xsl:call-template name="tokenize_tree">
<!-- store anything left in another variable -->
<xsl:with-param name="list" select="substring-after($list,$delimiter)"/>
<xsl:with-param name="delimiter" select="$delimiter"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:choose>
<xsl:when test="$list = ''">
<xsl:text/>
</xsl:when>
<xsl:otherwise>
<xsl:attribute name="{substring($list,1,2)}"><xsl:value-of select="$list"/></xsl:attribute>
</xsl:otherwise>
</xsl:choose>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
关于如何提高效率以便更快地处理更大文件的任何想法?