2

The Input I Have:

I'm working with a Sharepoint list that produces RSS feeds in the following form:

<?xml version="1.0"?>
<rss>
  <channel>
    <!-- Irrelevant Fields -->
    <item>
      <title type="text">Title</title>
      <description type="html">
        &lt;div&gt;&lt;b&gt;Field1:&lt;/b&gt; Value 1&lt;/div&gt;
        &lt;div&gt;&lt;b&gt;Field2:&lt;/b&gt; Value 2&lt;/div&gt;
        &lt;div&gt;&lt;b&gt;Field3:&lt;/b&gt; Value 3&lt;/div&gt;
        &lt;div&gt;&lt;b&gt;Field4:&lt;/b&gt; Value 4&lt;/div&gt;
        &lt;div&gt;&lt;b&gt;Field5:&lt;/b&gt; Value 5&lt;/div&gt;
      </description>
    </item>
    <item>
      <title type="text">Title</title>
      <description type="html">
        &lt;div&gt;&lt;b&gt;Field1:&lt;/b&gt; Value 1&lt;/div&gt;
        &lt;div&gt;&lt;b&gt;Field3:&lt;/b&gt; Value 3&lt;/div&gt;
        &lt;div&gt;&lt;b&gt;Field4:&lt;/b&gt; Value 4&lt;/div&gt;
        &lt;div&gt;&lt;b&gt;Field5:&lt;/b&gt; Value 5&lt;/div&gt;
      </description>
    </item>
    <item>
      <title type="text">Title</title>
      <description type="html">
        &lt;div&gt;&lt;b&gt;Field1:&lt;/b&gt; Value 1&lt;/div&gt;
        &lt;div&gt;&lt;b&gt;Field2:&lt;/b&gt; Value 2&lt;/div&gt;
        &lt;div&gt;&lt;b&gt;Field3:&lt;/b&gt; Value 3&lt;/div&gt;
        &lt;div&gt;&lt;b&gt;Field4:&lt;/b&gt; Value 4&lt;/div&gt;
        &lt;div&gt;&lt;b&gt;Field5:&lt;/b&gt; Value 5&lt;/div&gt;
      </description>
    </item>
    <!-- More <item> elements -->
  </channel>
</rss>

Note that the <description> element seems to define a set of elements. Furthermore, note that not all <description> elements contain markup for "Field2".

What I Need:

I need XML of the following form:

<?xml version="1.0"?>
<Events>
  <Event>
    <Category>Title</Category>
    <Field1>Value 1</Field1>
    <Field2>Value 2</Field2>
    <Field3>Value 3</Field3>
    <Field4>Value 4</Field4>
    <Field5>Value 5</Field5>
  </Event>
  <Event>
    <Category>Title</Category>
    <Field1>Value 1</Field1>
    <Field2/>
    <Field3>Value 3</Field3>
    <Field4>Value 4</Field4>
    <Field5>Value 5</Field5>
  </Event>
  <Event>
    <Category>Title</Category>
    <Field1>Value 1</Field1>
    <Field2>Value 2</Field2>
    <Field3>Value 3</Field3>
    <Field4>Value 4</Field4>
    <Field5>Value 5</Field5>
  </Event>
</Events>

The Rules (updated):

  1. This needs to be an XSLT 1.0 solution.
  2. xxx:node-set is the only valid extension function available to me; this includes extension functions written in other languages, such as C# or Javascript.
  3. If any field's information is missing, a blank element should be output. Note in my desired output the empty <Field2> child within the second <Event> element.
  4. We cannot assume that the field names themselves will follow any particular pattern; they may as well be <PeanutButter>, <Jelly>, etc.

What I Have So Far:

<?xml version="1.0"?>
<xsl:stylesheet
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:exsl="http://exslt.org/common" 
  exclude-result-prefixes="exsl"
  version="1.0">
  <xsl:output method="xml" omit-xml-declaration="no" indent="yes"/>
  <xsl:strip-space elements="*"/>

  <xsl:template match="/*">
    <Events>
      <xsl:apply-templates select="*/item"/>
    </Events>
  </xsl:template>

  <xsl:template match="item[contains(description, 'Field2')]">
    <Event>
      <xsl:variable name="vElements">
        <xsl:call-template name="tokenize">
          <xsl:with-param name="text" select="description"/>
          <xsl:with-param name="delimiter" select="'&#10;'"/>
        </xsl:call-template>
      </xsl:variable>

      <Category>
        <xsl:value-of select="title"/>
      </Category>
      <xsl:apply-templates
        select="exsl:node-set($vElements)/*[normalize-space()]" mode="token"/>
    </Event>
  </xsl:template>

  <!-- NOTE HOW THIS TEMPLATE IS NEARLY IDENTICAL TO THE LAST ONE,
       MINUS THE BLANK <Field2>; THAT'S NOT VERY ELEGANT. -->
  <xsl:template match="item[not(contains(description, 'Field2'))]">
    <Event>
      <xsl:variable name="vElements">
        <xsl:call-template name="tokenize">
          <xsl:with-param name="text" select="description"/>
          <xsl:with-param name="delimiter" select="'&#10;'"/>
        </xsl:call-template>
      </xsl:variable>

      <Category>
        <xsl:value-of select="title"/>
      </Category>
      <xsl:apply-templates
        select="exsl:node-set($vElements)/*[normalize-space()]" mode="token"/>
      <Field2/>
    </Event>
  </xsl:template>

  <xsl:template match="*" mode="token">
    <xsl:element
      name="{substring-after(
               substring-before(normalize-space(), ':'), 
               '&lt;div&gt;&lt;b&gt;')}">
      <xsl:value-of
        select="substring-before(
                  substring-after(., ':&lt;/b&gt; '),
                  '&lt;/div&gt;')"/>
    </xsl:element>
  </xsl:template>

  <xsl:template name="tokenize">
    <xsl:param name="text"/>
    <xsl:param name="delimiter" select="' '"/>
    <xsl:choose>
      <xsl:when test="contains($text,$delimiter)">
        <xsl:element name="token">
          <xsl:value-of select="substring-before($text,$delimiter)"/>
        </xsl:element>
        <xsl:call-template name="tokenize">
          <xsl:with-param
            name="text"
            select="substring-after($text,$delimiter)"/>
          <xsl:with-param
            name="delimiter"
            select="$delimiter"/>
        </xsl:call-template>
      </xsl:when>
      <xsl:when test="$text">
        <xsl:element name="token">
          <xsl:value-of select="$text"/>
        </xsl:element>
      </xsl:when>
    </xsl:choose>
  </xsl:template>
</xsl:stylesheet>

...which produces:

<?xml version="1.0"?>
<Events>
  <Event>
    <Category>Title</Category>
    <Field1>Value 1</Field1>
    <Field2>Value 2</Field2>
    <Field3>Value 3</Field3>
    <Field4>Value 4</Field4>
    <Field5>Value 5</Field5>
  </Event>
  <Event>
    <Category>Title</Category>
    <Field1>Value 1</Field1>
    <Field3>Value 3</Field3>
    <Field4>Value 4</Field4>
    <Field5>Value 5</Field5>
    <Field2/>
  </Event>
  <Event>
    <Category>Title</Category>
    <Field1>Value 1</Field1>
    <Field2>Value 2</Field2>
    <Field3>Value 3</Field3>
    <Field4>Value 4</Field4>
    <Field5>Value 5</Field5>
  </Event>
</Events>

There are two primary issues with my solution:

  1. It feels clunky; there's repetitive code and it seems a tad unwieldy. I'm thinking that some optimization could occur?
  2. Notice that it outputs empty <Field2> elements in the incorrect order and places them at the bottom. This is somewhat easily remedied, I suppose, but all of my solutions seem silly and are therefore not included. :)

Ready, Set, Go!

I would appreciate your help with a more elegant solution (or, at the least, a solution that fixes issue #2 above). Thanks!


Conclusion

Based on observations made by @Borodin in his own solution, I decided to go with the following:

<?xml version="1.0"?>
<xsl:stylesheet
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:exsl="http://exslt.org/common"
  exclude-result-prefixes="exsl"
  version="1.0">
  <xsl:output method="xml" omit-xml-declaration="no" indent="yes"/>
  <xsl:strip-space elements="*"/>

  <xsl:variable name="vFieldNames">
    <name oldName="Field1" newName="fieldA" />
    <name oldName="Field2" newName="fieldB" />
    <name oldName="Field3" newName="fieldC" />
    <name oldName="Field4" newName="fieldD" />
    <name oldName="Field5" newName="fieldE" />
  </xsl:variable>

  <xsl:template match="/">
    <events>
      <xsl:apply-templates select="*/*/item" />
    </events>
  </xsl:template>

  <xsl:template match="item">
    <event>
      <category>
        <xsl:value-of select="title" />
      </category>
      <xsl:apply-templates select="exsl:node-set($vFieldNames)/*">
        <xsl:with-param
          name="pDescriptionText"
          select="current()/description" />
      </xsl:apply-templates>
    </event>
  </xsl:template>

  <xsl:template match="name">
     <xsl:param name="pDescriptionText" />
     <xsl:variable
       name="vRough"
       select="substring-before(
                 substring-after($pDescriptionText, @oldName), 
                 'div')"/>

     <xsl:variable
       name="vValue"
       select="substring-before(
                 substring-after($vRough, '&gt;'),
                 '&lt;')"/>
     <xsl:element name="{@newName}">
       <xsl:value-of select="normalize-space($vValue)" />
     </xsl:element>
  </xsl:template>

</xsl:stylesheet>

This solution adds one extra layer: it allows me to change the field names nicely (via the oldName and newName attributes on each <name> element).

Thanks to all who answered!

4

3 回答 3

4

您可能对此解决方案感兴趣。Field1不过,我使用了文字字段名称Field5,并且由于您可以访问node-set,我已将这些名称添加到可以方便地修改的变量中。

该代码通过对description文本进行两次处理来处理文本以提取每个字段名称的值。第一次通过$rough选择字段名称之后和文本之前的文本来创建div。这将给出类似:&lt;/b&gt; Value 1&lt;/(或:</b> Value 1</)的东西。下一个提炼把之前和之后$rough的一切,给予。通过在元素中使用从这个最终值修剪空格。&gt;&lt;Value 1normalize-spacexsl:value-of

如果在目标字符串中找不到定界符字符串,XSLT 本身会Field2通过返回空字符串来处理缺失的(或任何字段) 。substring-before

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:ext="http://exslt.org/common"
    exclude-result-prefixes="ext"
    version="1.0">

    <xsl:strip-space elements="*"/>
    <xsl:output method="xml" indent="yes"/>

    <xsl:variable name="names">
        <name>Field1</name>
        <name>Field2</name>
        <name>Field3</name>
        <name>Field4</name>
        <name>Field5</name>
    </xsl:variable>

    <xsl:template match="/">
        <Events>
            <xsl:apply-templates select="rss/channel/item"/>
        </Events>
    </xsl:template>

    <xsl:template match="item">
        <xsl:variable name="description" select="description"/>
        <Event>
            <Category>
                <xsl:value-of select="title"/>
            </Category>
            <xsl:for-each select="ext:node-set($names)/name">
                <xsl:call-template name="extract">
                    <xsl:with-param name="text" select="$description"/>
                    <xsl:with-param name="field-name" select="."/>
                </xsl:call-template>
                <xsl:variable name="field-name" select="."/>
            </xsl:for-each>
        </Event>
    </xsl:template>

    <xsl:template name="extract">
        <xsl:param name="text"/>
        <xsl:param name="field-name"/>
        <xsl:variable name="rough" select="substring-before(substring-after($text, $field-name), 'div')"/>
        <xsl:variable name="value" select="substring-before(substring-after($rough, '&gt;'), '&lt;')"/>
        <xsl:element name="{$field-name}">
            <xsl:value-of select="normalize-space($value)"/>
        </xsl:element>
    </xsl:template>

</xsl:stylesheet>

输出

<?xml version="1.0" encoding="utf-8"?>
<Events>
   <Event>
      <Category>Title</Category>
      <Field1>Value 1</Field1>
      <Field2>Value 2</Field2>
      <Field3>Value 3</Field3>
      <Field4>Value 4</Field4>
      <Field5>Value 5</Field5>
   </Event>
   <Event>
      <Category>Title</Category>
      <Field1>Value 1</Field1>
      <Field2/>
      <Field3>Value 3</Field3>
      <Field4>Value 4</Field4>
      <Field5>Value 5</Field5>
   </Event>
   <Event>
      <Category>Title</Category>
      <Field1>Value 1</Field1>
      <Field2>Value 2</Field2>
      <Field3>Value 3</Field3>
      <Field4>Value 4</Field4>
      <Field5>Value 5</Field5>
   </Event>
</Events>
于 2013-05-06T04:49:43.883 回答
0

她是基于非常好的“提取”模板@Borodin 的递归解决方案。凭借小的优势,这也可以在没有 node-set() 的情况下工作。

<?xml version="1.0"?>
<xsl:stylesheet
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  version="1.0">
    <xsl:output method="xml"  indent="yes"/>
    <xsl:strip-space elements="*"/>

    <xsl:template match="/*">
        <Events>
            <xsl:apply-templates select="//item"/>
        </Events>
    </xsl:template>

    <xsl:template match="item">
        <Event>
            <Category>
                <xsl:value-of select="title"/>
            </Category>
            <xsl:call-template name="Field" >
                <xsl:with-param name="fnr" select="'1'" />
                <xsl:with-param name="max_fnr" select="'5'" />
            </xsl:call-template>
        </Event>
    </xsl:template>

    <xsl:template name="Field">
        <xsl:param name="fnr" />
        <xsl:param name="max_fnr" />

        <xsl:call-template name="extract">
            <xsl:with-param name="text" select="."/>
            <xsl:with-param name="field-name" select="concat('Field',$fnr)"/>
        </xsl:call-template>

        <xsl:if test="$fnr &lt; $max_fnr">
            <xsl:call-template name="Field" >
                <xsl:with-param name="fnr" select="$fnr+1" />
                <xsl:with-param name="max_fnr" select="$max_fnr" />
            </xsl:call-template>
        </xsl:if>
    </xsl:template>

    <xsl:template name="extract">
        <xsl:param name="text"/>
        <xsl:param name="field-name"/>
        <xsl:variable name="rough" select="substring-before(substring-after($text, $field-name), 'div')"/>
        <xsl:variable name="value" select="substring-before(substring-after($rough, '&gt;'), '&lt;')"/>
        <xsl:element name="{$field-name}">
            <xsl:value-of select="normalize-space($value)"/>
        </xsl:element>
    </xsl:template>
</xsl:stylesheet> 

这将生成以下输出:

<Events>
  <Event>
    <Category>Title</Category>
    <Field1>Value 1</Field1>
    <Field2>Value 2</Field2>
    <Field3>Value 3</Field3>
    <Field4>Value 4</Field4>
    <Field5>Value 5</Field5>
  </Event>
  <Event>
    <Category>Title</Category>
    <Field1>Value 1</Field1>
    <Field2/>
    <Field3>Value 3</Field3>
    <Field4>Value 4</Field4>
    <Field5>Value 5</Field5>
  </Event>
  <Event>
    <Category>Title</Category>
    <Field1>Value 1</Field1>
    <Field2>Value 2</Field2>
    <Field3>Value 3</Field3>
    <Field4>Value 4</Field4>
    <Field5>Value 5</Field5>
  </Event>
</Events>
于 2013-05-06T10:02:27.340 回答
0

这里有一些“如果”的解决方案。
如果描述的内容始终是“格式良好的 XML”(如您的示例中所示),并且
您可以执行两个单独的传递(两个 xslt 处理器调用)。

第 1 步:生成一个临时 xml 文件, disable-output-escaping="yes"其中包含描述内容(简单明了)。

<?xml version="1.0"?>
<xsl:stylesheet
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  version="1.0">
    <xsl:output method="xml"  indent="yes"/>
    <xsl:strip-space elements="*"/>

    <xsl:template match="@*|node()">
        <xsl:copy>
            <xsl:apply-templates select="@*|node()"/>
        </xsl:copy>
    </xsl:template>

    <xsl:template match="description">
        <xsl:copy>
            <xsl:apply-templates select="@*"/>
            <xsl:value-of select="."  disable-output-escaping="yes"/>
        </xsl:copy>
    </xsl:template>
</xsl:stylesheet>

通过 2:从临时 xml 文件生成预期的输出(现在也很容易):

<?xml version="1.0"?>
<xsl:stylesheet
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  version="1.0">
    <xsl:output method="xml"  indent="yes"/>
    <xsl:strip-space elements="*"/>

    <xsl:template match="/*">
        <Events>
            <xsl:apply-templates select="//item"/>
        </Events>
    </xsl:template>

    <xsl:template match="item">
        <Event>
            <Category>
                <xsl:value-of select="title"/>
            </Category>
            <xsl:call-template name="Field" >
                <xsl:with-param name="fnr" select="'1'" />
                <xsl:with-param name="max_fnr" select="'5'" />
            </xsl:call-template>
        </Event>
    </xsl:template>

    <xsl:template name="Field">
        <xsl:param name="fnr" />
        <xsl:param name="max_fnr" />
        <xsl:element name="Field{$fnr}" >
            <xsl:value-of select="description/div[b[text()=concat('Field', $fnr, ':')]]/text()"/>
        </xsl:element>
        <xsl:if test="$fnr &lt; $max_fnr">
            <xsl:call-template name="Field" >
                <xsl:with-param name="fnr" select="$fnr+1" />
                <xsl:with-param name="max_fnr" select="$max_fnr" />
            </xsl:call-template>
        </xsl:if>
    </xsl:template>
</xsl:stylesheet> 
于 2013-05-06T10:16:06.283 回答