1

我尽力在 PDI 中的文件内容上使用正则表达式,但它在输出中给了我空值。正则表达式在正则表达式评估步骤的测试正则表达式部分中完美运行,但它没有在预览中向我显示相同的输出。

这是文件内容:

我期待输出中的 1:19:18.637s,但它是null.

这是示例代码。它不适用于您的本地机器,但它肯定会让您了解我想要实现的目标。这是我正在尝试的代码:

<?xml version="1.0" encoding="UTF-8"?>
<transformation-steps>
<steps>
  <step>
    <name>Generate Rows</name>
    <type>RowGenerator</type>
    <description/>
    <distribute>Y</distribute>
    <custom_distribution/>
    <copies>1</copies>
         <partitioning>
           <method>none</method>
           <schema_name/>
           </partitioning>
    <fields>
    </fields>
    <limit>1</limit>
    <never_ending>N</never_ending>
    <interval_in_ms>5000</interval_in_ms>
    <row_time_field>now</row_time_field>
    <last_time_field>FiveSecondsAgo</last_time_field>
     <cluster_schema/>
 <remotesteps>   <input>   </input>   <output>   </output> </remotesteps>    <GUI>
      <xloc>318</xloc>
      <yloc>286</yloc>
      <draw>Y</draw>
      </GUI>
    </step>

  <step>
    <name>Get File Names</name>
    <type>GetFileNames</type>
    <description/>
    <distribute>Y</distribute>
    <custom_distribution/>
    <copies>1</copies>
         <partitioning>
           <method>none</method>
           <schema_name/>
           </partitioning>
    <filter>
      <filterfiletype>all_files</filterfiletype>
    </filter>
    <doNotFailIfNoFile>N</doNotFailIfNoFile>
    <rownum>N</rownum>
    <isaddresult>Y</isaddresult>
    <filefield>N</filefield>
    <rownum_field/>
    <filename_Field/>
    <wildcard_Field/>
    <exclude_wildcard_Field/>
    <dynamic_include_subfolders>N</dynamic_include_subfolders>
    <limit>10</limit>
    <file>
      <name>&#x24;&#x7b;DEVCI_DATA_HOME&#x7d;&#x2f;console_output&#x2f;</name>
      <filemask>.&#x2a;txt</filemask>
      <exclude_filemask/>
      <file_required>N</file_required>
      <include_subfolders>N</include_subfolders>
    </file>
     <cluster_schema/>
 <remotesteps>   <input>   </input>   <output>   </output> </remotesteps>    <GUI>
      <xloc>438</xloc>
      <yloc>286</yloc>
      <draw>Y</draw>
      </GUI>
    </step>

  <step>
    <name>Get Variables</name>
    <type>GetVariable</type>
    <description/>
    <distribute>Y</distribute>
    <custom_distribution/>
    <copies>1</copies>
         <partitioning>
           <method>none</method>
           <schema_name/>
           </partitioning>
    <fields>
      <field>
        <name>issue_key_regex</name>
        <variable>&#x24;&#x7b;issue_key_regex&#x7d;</variable>
        <type>String</type>
        <format/>
        <currency/>
        <decimal/>
        <group/>
        <length>-1</length>
        <precision>-1</precision>
        <trim_type>none</trim_type>
      </field>
    </fields>
     <cluster_schema/>
 <remotesteps>   <input>   </input>   <output>   </output> </remotesteps>    <GUI>
      <xloc>438</xloc>
      <yloc>126</yloc>
      <draw>Y</draw>
      </GUI>
    </step>

  <step>
    <name>Load file content in memory</name>
    <type>LoadFileInput</type>
    <description/>
    <distribute>Y</distribute>
    <custom_distribution/>
    <copies>1</copies>
         <partitioning>
           <method>none</method>
           <schema_name/>
           </partitioning>
    <include>N</include>
    <include_field>full_file_path</include_field>
    <rownum>N</rownum>
    <addresultfile>N</addresultfile>
    <IsIgnoreEmptyFile>Y</IsIgnoreEmptyFile>
    <rownum_field/>
    <encoding/>
    <file>
      <name>C&#x3a;&#x5c;Users&#x5c;nikhil.karkare&#x5c;console_output&#x5c;star-lin64-build-feature_VMESH120_29.txt</name>
      <filemask/>
      <exclude_filemask/>
      <file_required>N</file_required>
      <include_subfolders>N</include_subfolders>
      </file>
    <fields>
      <field>
        <name>File content</name>
        <element_type>content</element_type>
        <type>String</type>
        <format/>
        <currency/>
        <decimal/>
        <group/>
        <length>-1</length>
        <precision>-1</precision>
        <trim_type>none</trim_type>
        <repeat>N</repeat>
        </field>
      </fields>
    <limit>0</limit>
    <IsInFields>Y</IsInFields>
    <DynamicFilenameField>filename</DynamicFilenameField>
    <shortFileFieldName>file_name</shortFileFieldName>
    <pathFieldName/>
    <hiddenFieldName/>
    <lastModificationTimeFieldName/>
    <uriNameFieldName/>
    <rootUriNameFieldName/>
    <extensionFieldName/>
     <cluster_schema/>
 <remotesteps>   <input>   </input>   <output>   </output> </remotesteps>    <GUI>
      <xloc>938</xloc>
      <yloc>286</yloc>
      <draw>Y</draw>
      </GUI>
    </step>

  <step>
    <name>Regex Evaluation 3</name>
    <type>RegexEval</type>
    <description/>
    <distribute>Y</distribute>
    <custom_distribution/>
    <copies>1</copies>
         <partitioning>
           <method>none</method>
           <schema_name/>
           </partitioning>
    <script><![CDATA[.*console_output\/([A-Za-z0-9_\.\-]+).txt]]></script>    <matcher>uri</matcher>
    <resultfieldname/>
    <usevar>N</usevar>
    <allowcapturegroups>Y</allowcapturegroups>
    <replacefields>Y</replacefields>
    <canoneq>N</canoneq>
    <caseinsensitive>N</caseinsensitive>
    <comment>N</comment>
    <dotall>N</dotall>
    <multiline>N</multiline>
    <unicode>N</unicode>
    <unix>N</unix>
    <fields>
      <field>
        <name>build_id_from_regex</name>
        <type>String</type>
        <format/>
        <group/>
        <decimal/>
        <length>-1</length>
        <precision>-1</precision>
        <nullif/>
        <ifnull/>
        <trimtype>none</trimtype>
      </field>
    </fields>
     <cluster_schema/>
 <remotesteps>   <input>   </input>   <output>   </output> </remotesteps>    <GUI>
      <xloc>1098</xloc>
      <yloc>286</yloc>
      <draw>Y</draw>
      </GUI>
    </step>

  <step>
    <name>Regex Evaluation 4</name>
    <type>RegexEval</type>
    <description/>
    <distribute>Y</distribute>
    <custom_distribution/>
    <copies>1</copies>
         <partitioning>
           <method>none</method>
           <schema_name/>
           </partitioning>
    <script><![CDATA[^.*\[INFO\].*star\-maven.*SUCCESS.*\[(.*)\].*]]></script>    <matcher>File content</matcher>
    <resultfieldname>result</resultfieldname>
    <usevar>N</usevar>
    <allowcapturegroups>Y</allowcapturegroups>
    <replacefields>Y</replacefields>
    <canoneq>N</canoneq>
    <caseinsensitive>N</caseinsensitive>
    <comment>N</comment>
    <dotall>N</dotall>
    <multiline>N</multiline>
    <unicode>N</unicode>
    <unix>N</unix>
    <fields>
      <field>
        <name>star_maven_time</name>
        <type>String</type>
        <format/>
        <group/>
        <decimal/>
        <length>-1</length>
        <precision>-1</precision>
        <nullif/>
        <ifnull/>
        <trimtype>none</trimtype>
      </field>
    </fields>
     <cluster_schema/>
 <remotesteps>   <input>   </input>   <output>   </output> </remotesteps>    <GUI>
      <xloc>725</xloc>
      <yloc>124</yloc>
      <draw>Y</draw>
      </GUI>
    </step>

  <step>
    <name>Regex Evaluation 6</name>
    <type>RegexEval</type>
    <description/>
    <distribute>Y</distribute>
    <custom_distribution/>
    <copies>1</copies>
         <partitioning>
           <method>none</method>
           <schema_name/>
           </partitioning>
    <script><![CDATA[([A-Za-z0-9_\.\-]+).txt]]></script>    <matcher>short_filename</matcher>
    <resultfieldname/>
    <usevar>N</usevar>
    <allowcapturegroups>Y</allowcapturegroups>
    <replacefields>Y</replacefields>
    <canoneq>N</canoneq>
    <caseinsensitive>N</caseinsensitive>
    <comment>N</comment>
    <dotall>N</dotall>
    <multiline>N</multiline>
    <unicode>N</unicode>
    <unix>N</unix>
    <fields>
      <field>
        <name>build_id_from_short_filename</name>
        <type>String</type>
        <format/>
        <group/>
        <decimal/>
        <length>-1</length>
        <precision>-1</precision>
        <nullif/>
        <ifnull/>
        <trimtype>none</trimtype>
      </field>
    </fields>
     <cluster_schema/>
 <remotesteps>   <input>   </input>   <output>   </output> </remotesteps>    <GUI>
      <xloc>558</xloc>
      <yloc>286</yloc>
      <draw>Y</draw>
      </GUI>
    </step>

  <step>
    <name>Stream lookup 4</name>
    <type>StreamLookup</type>
    <description/>
    <distribute>Y</distribute>
    <custom_distribution/>
    <copies>1</copies>
         <partitioning>
           <method>none</method>
           <schema_name/>
           </partitioning>
    <from>Regex Evaluation 3</from>
    <input_sorted>N</input_sorted>
    <preserve_memory>Y</preserve_memory>
    <sorted_list>N</sorted_list>
    <integer_pair>N</integer_pair>
    <lookup>
      <key>
        <name>build_id</name>
        <field>build_id_from_regex</field>
      </key>
      <value>
        <name>build_id_from_regex</name>
        <rename>build_id_from_regex</rename>
        <default/>
        <type>String</type>
      </value>
      <value>
        <name>File content</name>
        <rename>File content</rename>
        <default/>
        <type>String</type>
      </value>
    </lookup>
     <cluster_schema/>
 <remotesteps>   <input>   </input>   <output>   </output> </remotesteps>    <GUI>
      <xloc>578</xloc>
      <yloc>126</yloc>
      <draw>Y</draw>
      </GUI>
    </step>

  <step>
    <name>Dummy &#x28;do nothing&#x29;</name>
    <type>Dummy</type>
    <description/>
    <distribute>Y</distribute>
    <custom_distribution/>
    <copies>1</copies>
         <partitioning>
           <method>none</method>
           <schema_name/>
           </partitioning>
     <cluster_schema/>
 <remotesteps>   <input>   </input>   <output>   </output> </remotesteps>    <GUI>
      <xloc>1036</xloc>
      <yloc>120</yloc>
      <draw>Y</draw>
      </GUI>
    </step>

  <step>
    <name>Data Grid</name>
    <type>DataGrid</type>
    <description/>
    <distribute>Y</distribute>
    <custom_distribution/>
    <copies>1</copies>
         <partitioning>
           <method>none</method>
           <schema_name/>
           </partitioning>
    <fields>
      <field>
        <name>build_id</name>
        <type>String</type>
        <format/>
        <currency/>
        <decimal/>
        <group/>
        <length>-1</length>
        <precision>-1</precision>
        <set_empty_string>N</set_empty_string>
      </field>
    </fields>
    <data>
      <line> <item/> </line>
    </data>
     <cluster_schema/>
 <remotesteps>   <input>   </input>   <output>   </output> </remotesteps>    <GUI>
      <xloc>308</xloc>
      <yloc>126</yloc>
      <draw>Y</draw>
      </GUI>
    </step>

</steps>
<order>
  <hop> <from>Generate Rows</from><to>Get File Names</to><enabled>Y</enabled> </hop>
  <hop> <from>Get File Names</from><to>Regex Evaluation 6</to><enabled>Y</enabled> </hop>
  <hop> <from>Get Variables</from><to>Stream lookup 4</to><enabled>Y</enabled> </hop>
  <hop> <from>Load file content in memory</from><to>Regex Evaluation 3</to><enabled>Y</enabled> </hop>
  <hop> <from>Regex Evaluation 3</from><to>Stream lookup 4</to><enabled>Y</enabled> </hop>
  <hop> <from>Regex Evaluation 4</from><to>Dummy &#x28;do nothing&#x29;</to><enabled>Y</enabled> </hop>
  <hop> <from>Regex Evaluation 6</from><to>Load file content in memory</to><enabled>Y</enabled> </hop>
  <hop> <from>Stream lookup 4</from><to>Regex Evaluation 4</to><enabled>Y</enabled> </hop>
  <hop> <from>Data Grid</from><to>Get Variables</to><enabled>Y</enabled> </hop>
</order>
<notepads>
</notepads>
<step_error_handling>
</step_error_handling>
</transformation-steps>

任何建议将不胜感激。谢谢

4

1 回答 1

0

没关系。我正在使用的正则表达式需要根据我试图提取的字符串进行更多改进。问题解决了。

结论:如果文件内容以兆字节为单位,并且您想从中提取字符串,那么您的 RegEx 应该非常特定于该字符串(它应该始终是特定的 BTW)。当您在 RegEx 编译器或 RegEx 评估步骤的测试实用程序上对其进行测试时,它可能会为您提供正确的结果,但是当您运行转换时,您只会看到空值。对 RegEx 进行返工并继续对其进行优化,直到您在输出中看到要查看的提取字符串。

于 2016-09-29T20:46:11.850 回答