2

我想从同一个父节点中删除连续的重复节点,并且它也有完全相同的子节点。

输入场景一:

<myroot>
    <nodeA id="a">
        <section id="i">  
            <item1 id="0" method="create"> 
                <somechild>a</somechild>
            </item1>

            <item1 id="1" method="create">
                <otherchild>a</otherchild>
            </item1>
        </section>        

        <section id="i">
            <item1 id="0" method="create"> <!-- second consecutive create, we remove this -->
                <somechild>a</somechild>
            </item1>
            <item1 id="0" method="create"> <!-- third consecutive create, but children have different value , so we don't remove this -->
                <somechild>bbb</somechild>
            </item1>
            <item1 id="3" method="create">
                <other>xx</other>    
            </item1>

            <item1 id="0" method="change"> 
                <otherchild>a</otherchild>
            </item1>
        </section>
    </nodeA>

    <nodeA id="b">
        <section id="i">  
            <item1 id="0" method="create"> 
                <somechild>a</somechild>
            </item1>

            <item1 id="1" method="create">
                <otherchild>a</otherchild>
            </item1>
        </section>        

        <section id="i">
            <item1 id="0" method="create"> <!-- second consecutive create, we remove this -->
                <somechild>a</somechild>
            </item1>
            <item1 id="0" method="create"> <!-- third consecutive create, but children have different value , so we don't remove this -->
                <somechild>bbb</somechild>
            </item1>
            <item1 id="3" method="create">
                <other>xx</other>    
            </item1>

            <item1 id="0" method="change"> 
                <otherchild>a</otherchild>
            </item1>
        </section>
    </nodeA>

    <nodeB id="b">
        <section id="i">  
            <item1 id="0" method="create"> 
                <somechild>a</somechild>
            </item1>

            <item1 id="1" method="create">
                <otherchild>a</otherchild>
            </item1>
        </section>        

        <section id="i">
            <item1 id="0" method="create"> <!-- second consecutive create, we remove this -->
                <somechild>a</somechild>
            </item1>
            <item1 id="0" method="create"> <!-- third consecutive create, but children have different value , so we don't remove this -->
                <somechild>bbb</somechild>
            </item1>
            <item1 id="3" method="create">
                <other>xx</other>    
            </item1>

            <item1 id="0" method="change"> 
                <otherchild>a</otherchild>
            </item1>
        </section>
    </nodeB>
</myroot>

我的结果:

<myroot>
    <nodeA id="a">
        <section id="i">
            <item1 id="0" method="create">
                <somechild>a</somechild>
            </item1>
            <item1 id="1" method="create">
                <otherchild>a</otherchild>
            </item1>
        </section>
        <section id="i">
            <item1 id="0" method="create">
                <somechild>bbb</somechild>
            </item1>
            <item1 id="3" method="create">
                <other>xx</other>
            </item1>
            <item1 id="0" method="change">
                <otherchild>a</otherchild>
            </item1>
        </section>
    </nodeA>
    <nodeA id="b">
        <section id="i"/>
        <section id="i"/>
    </nodeA>
    <nodeB id="b">
        <section id="i"/>
        <section id="i"/>
    </nodeB>
</myroot>

预期输出:

<myroot>
    <nodeA id="a">
        <section id="i">  
            <item1 id="0" method="create"> 
                <somechild>a</somechild>
            </item1>

            <item1 id="1" method="create">
                <otherchild>a</otherchild>
            </item1>
        </section>        

        <section id="i">
            <item1 id="0" method="create"> 
                <somechild>bbb</somechild>
            </item1>
            <item1 id="3" method="create">
                <other>xx</other>    
            </item1>

            <item1 id="0" method="change"> 
                <otherchild>a</otherchild>
            </item1>
        </section>
    </nodeA>

    <nodeA id="b">
        <section id="i">  
            <item1 id="0" method="create"> 
                <somechild>a</somechild>
            </item1>

            <item1 id="1" method="create">
                <otherchild>a</otherchild>
            </item1>
        </section>        

        <section id="i">
            <item1 id="0" method="create"> 
                <somechild>bbb</somechild>
            </item1>
            <item1 id="3" method="create">
                <other>xx</other>    
            </item1>

            <item1 id="0" method="change"> 
                <otherchild>a</otherchild>
            </item1>
        </section>
    </nodeA>

    <nodeB id="b">
        <section id="i">  
            <item1 id="0" method="create"> 
                <somechild>a</somechild>
            </item1>

            <item1 id="1" method="create">
                <otherchild>a</otherchild>
            </item1>
        </section>        

        <section id="i">
            <item1 id="0" method="create"> 
                <somechild>bbb</somechild>
            </item1>
            <item1 id="3" method="create">
                <other>xx</other>    
            </item1>

            <item1 id="0" method="change"> 
                <otherchild>a</otherchild>
            </item1>
        </section>
    </nodeB>
</myroot>

在上面的第一个场景中:只有第二个连续创建具有相同的孩子,而第三个连续的create方法有不同的孩子,这就是我们只删除第二个的原因。

第二个输入场景(更多变化):

<myroot>
    <nodeB id="a">
        <cell id="i">              
            <item2 id="1" method="create">
                <otherchild>a</otherchild>
            </item2>

            <item2 id="0" method="create"> 
                <otherchild>a</otherchild>
            </item2>

            <item2 id="1" method="modify">
                <otherchild>a</otherchild>
            </item2>        
        </cell>        

        <cell id="i">
            <item2 id="1" method="modify"> <!-- second consecutive modify, we remove this -->
                <otherchild>a</otherchild>
            </item2>

            <item2 id="1" method="modify"> <!-- third consecutive modify, BUT different chldren, we do NOT remove this -->
                <otherchild>a</otherchild>
                <somechild>aa</somechild>
            </item2>

            <item2 id="1" method="delete" /> 

            <item2 id="0" method="create"> 
                <somechild>bbb</somechild>
            </item2>

            <item2 id="1" method="delete" /> <!-- second consecutive delete, we remove this -->

            <item2 id="3" method="create">
                <other>xx</other>    
            </item2>

            <item2 id="1" method="delete" />  <!-- third consecutive delete, we remove this -->           
        </cell>
    </nodeB>
</myroot>

输出:

<myroot>
    <nodeB id="a">
        <cell id="i">              
            <item2 id="1" method="create">
                <otherchild>a</otherchild>
            </item2>

             <item2 id="0" method="create"> 
                <otherchild>a</otherchild>
            </item2>

            <item2 id="1" method="modify">
                <otherchild>a</otherchild>
            </item2>        
        </cell>        

        <cell id="i">
            <item2 id="1" method="modify">
                <otherchild>a</otherchild>
                <somechild>aa</somechild>
            </item2>

            <item2 id="1" method="delete" /> 

            <item2 id="0" method="create"> 
                <somechild>bbb</somechild>
            </item2>           
        </cell>
    </nodeB>
</myroot>

不在这方面工作:

    <myroot>     
        <node1 id="a">
            <section id="i">
                <item1 id="0" method="start">
                    <somechild>a</somechild>
                </item1>
                <item1 id="0" method="start"> <!-- this one is successive from the previous so we eliminate -->
                    <somechild>a</somechild>
                </item1>
                <item1 id="0" method="stop"/>                
                <item1 id="0" method="start"> <!-- this will be treated as new starting point -->
                    <somechild>a</somechild>
                </item1>
            </section>  

            <section id="i">
                <item1 id="0" method="start"> <!-- this one is successive from the previous so we eliminate -->
                    <somechild>a</somechild>
                </item1>
            </section>                
        </node1>
    </myroot>

output:

    <myroot>     
        <node1 id="a">
            <section id="i">
                <item1 id="0" method="start">
                    <somechild>a</somechild>
                </item1>
                <item1 id="0" method="start"> <!-- this one is successive from the previous so we eliminate -->
                    <somechild>a</somechild>
                </item1>
                <item1 id="0" method="stop"/>                          
            </section>                  
            <section id="i"/>                              
        </node1>
    </myroot>

The correct output should be:

    <myroot>     
        <node1 id="a">
            <section id="i">
                <item1 id="0" method="start">
                    <somechild>a</somechild>
                </item1>                   
                <item1 id="0" method="stop"/>                
                <item1 id="0" method="start"> <!-- this will be treated as new starting point -->
                    <somechild>a</somechild>
                </item1>
            </section>                  
            <section id="i" />                   
        </node1>
    </myroot>

任何人都可以帮助我使用 XSLT 进行这个棘手的删除吗?非常感谢。

约翰

4

1 回答 1

5

这是一个 XSLT 2.0 示例样式表,应该可以完成这项工作,或者至少让您了解 usingdeep-equal可以如何提供帮助:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet 
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  version="2.0">

  <xsl:strip-space elements="*"/>
  <xsl:output indent="yes"/>

  <xsl:template match="@* | node()">
    <xsl:copy>
      <xsl:apply-templates select="@* , node()"/>
    </xsl:copy>
  </xsl:template>

  <xsl:template match="/*/*/*/*[some $el in (preceding-sibling::* , preceding::*) satisfies deep-equal(., $el)]"/>

</xsl:stylesheet>

[编辑] 有了新的要求,我没有办法解决这个问题,deep-equal所以我写了一个新的样式表,它使用for-each-groupand deep-equal

<xsl:stylesheet 
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  version="2.0">

  <xsl:strip-space elements="*"/>
  <xsl:output indent="yes"/>

  <xsl:template match="@* | node()" name="identity">
    <xsl:copy>
      <xsl:apply-templates select="@* , node()"/>
    </xsl:copy>
  </xsl:template>

  <xsl:template match="/*/*">
    <xsl:copy>
      <xsl:variable name="first-in-group" as="element()*">
        <xsl:for-each-group select="*" group-by="concat(node-name(.), '|', @id)">
          <xsl:for-each-group select="current-group()/*" group-by="concat(@id, '|', @method)">
            <xsl:sequence 
              select="for $pos in 1 to count(current-group())
                      return current-group()[$pos]
                              [every $item 
                              in subsequence(current-group(), 1, $pos - 1) 
                              satisfies not(deep-equal($item, current-group()[$pos]))] "/>
          </xsl:for-each-group>
        </xsl:for-each-group>
      </xsl:variable>
      <xsl:apply-templates select="@*"/>
      <xsl:apply-templates>
        <xsl:with-param name="first-in-group" select="$first-in-group" tunnel="yes"/>
      </xsl:apply-templates>
    </xsl:copy>
  </xsl:template>

  <xsl:template match="/*/*/*/*">
    <xsl:param name="first-in-group" tunnel="yes"/>
    <xsl:if test="$first-in-group intersect .">
      <xsl:call-template name="identity"/>
    </xsl:if>
  </xsl:template>

</xsl:stylesheet>

使用该样式表 Saxon 9.4,当应用于示例时

<myroot>
    <nodeA id="a">
        <section id="i">  
            <item1 id="0" method="create"> 
                <somechild>a</somechild>
            </item1>

            <item1 id="1" method="create">
                <otherchild>a</otherchild>
            </item1>
        </section>        

        <section id="i">
            <item1 id="0" method="create"> <!-- second consecutive create, we remove this -->
                <somechild>a</somechild>
            </item1>
            <item1 id="0" method="create"> <!-- third consecutive create, but children have different value , so we don't remove this -->
                <somechild>bbb</somechild>
            </item1>
            <item1 id="3" method="create">
                <other>xx</other>    
            </item1>

            <item1 id="0" method="change"> 
                <otherchild>a</otherchild>
            </item1>
        </section>
    </nodeA>

    <nodeA id="b">
        <section id="i">  
            <item1 id="0" method="create"> 
                <somechild>a</somechild>
            </item1>

            <item1 id="1" method="create">
                <otherchild>a</otherchild>
            </item1>
        </section>        

        <section id="i">
            <item1 id="0" method="create"> <!-- second consecutive create, we remove this -->
                <somechild>a</somechild>
            </item1>
            <item1 id="0" method="create"> <!-- third consecutive create, but children have different value , so we don't remove this -->
                <somechild>bbb</somechild>
            </item1>
            <item1 id="3" method="create">
                <other>xx</other>    
            </item1>

            <item1 id="0" method="change"> 
                <otherchild>a</otherchild>
            </item1>
        </section>
    </nodeA>

    <nodeB id="b">
        <section id="i">  
            <item1 id="0" method="create"> 
                <somechild>a</somechild>
            </item1>

            <item1 id="1" method="create">
                <otherchild>a</otherchild>
            </item1>
        </section>        

        <section id="i">
            <item1 id="0" method="create"> <!-- second consecutive create, we remove this -->
                <somechild>a</somechild>
            </item1>
            <item1 id="0" method="create"> <!-- third consecutive create, but children have different value , so we don't remove this -->
                <somechild>bbb</somechild>
            </item1>
            <item1 id="3" method="create">
                <other>xx</other>    
            </item1>

            <item1 id="0" method="change"> 
                <otherchild>a</otherchild>
            </item1>
        </section>
    </nodeB>
</myroot>

输出

<myroot>
   <nodeA id="a">
      <section id="i">
         <item1 id="0" method="create">
            <somechild>a</somechild>
         </item1>
         <item1 id="1" method="create">
            <otherchild>a</otherchild>
         </item1>
      </section>
      <section id="i">
         <item1 id="0" method="create"><!-- third consecutive create, but children have different value , so we don't re
move this --><somechild>bbb</somechild>
         </item1>
         <item1 id="3" method="create">
            <other>xx</other>
         </item1>
         <item1 id="0" method="change">
            <otherchild>a</otherchild>
         </item1>
      </section>
   </nodeA>
   <nodeA id="b">
      <section id="i">
         <item1 id="0" method="create">
            <somechild>a</somechild>
         </item1>
         <item1 id="1" method="create">
            <otherchild>a</otherchild>
         </item1>
      </section>
      <section id="i">
         <item1 id="0" method="create"><!-- third consecutive create, but children have different value , so we don't re
move this --><somechild>bbb</somechild>
         </item1>
         <item1 id="3" method="create">
            <other>xx</other>
         </item1>
         <item1 id="0" method="change">
            <otherchild>a</otherchild>
         </item1>
      </section>
   </nodeA>
   <nodeB id="b">
      <section id="i">
         <item1 id="0" method="create">
            <somechild>a</somechild>
         </item1>
         <item1 id="1" method="create">
            <otherchild>a</otherchild>
         </item1>
      </section>
      <section id="i">
         <item1 id="0" method="create"><!-- third consecutive create, but children have different value , so we don't re
move this --><somechild>bbb</somechild>
         </item1>
         <item1 id="3" method="create">
            <other>xx</other>
         </item1>
         <item1 id="0" method="change">
            <otherchild>a</otherchild>
         </item1>
      </section>
   </nodeB>
</myroot>

所以希望所有你想被删除的东西都被删除了,你想保留的东西都被保留了。

于 2012-05-21T17:01:57.930 回答