我有 pmml 格式的决策树模型,如下所示。如何以文本或其他格式保存每个叶子的规则?
例如:uniformitycellsize <= 3.5 ^ clumpthickness <= 6.5 ^ normalnucleoli >= 3.5 => B
<TreeModel modelName="DecisionTree" functionName="classification" splitCharacteristic="binarySplit" missingValueStrategy="lastPrediction" noTrueChildStrategy="returnNullPrediction">
<MiningSchema>
<MiningField name="clumpthickness" invalidValueTreatment="asIs"/>
<MiningField name="uniformitycellsize" invalidValueTreatment="asIs"/>
<MiningField name="uniformitycellshape" invalidValueTreatment="asIs"/>
<MiningField name="marginaladhesion" invalidValueTreatment="asIs"/>
<MiningField name="epithelialcellsize" invalidValueTreatment="asIs"/>
<MiningField name="barenuclei" invalidValueTreatment="asIs"/>
<MiningField name="blandchromatin" invalidValueTreatment="asIs"/>
<MiningField name="normalnucleoli" invalidValueTreatment="asIs"/>
<MiningField name="mitoses" invalidValueTreatment="asIs"/>
<MiningField name="partition" invalidValueTreatment="asIs"/>
<MiningField name="Class_Categorical" invalidValueTreatment="asIs" usageType="target"/>
</MiningSchema>
<Node id="0" score="B" recordCount="559.0">
<True/>
<ScoreDistribution value="B" recordCount="365.0"/>
<ScoreDistribution value="M" recordCount="194.0"/>
<Node id="1" score="B" recordCount="384.0">
<SimplePredicate field="uniformitycellsize" operator="lessOrEqual" value="3.5"/>
<ScoreDistribution value="B" recordCount="356.0"/>
<ScoreDistribution value="M" recordCount="28.0"/>
<Node id="2" score="B" recordCount="368.0">
<SimplePredicate field="clumpthickness" operator="lessOrEqual" value="6.5"/>
<ScoreDistribution value="B" recordCount="354.0"/>
<ScoreDistribution value="M" recordCount="14.0"/>
<Node id="3" score="B" recordCount="353.0">
<SimplePredicate field="normalnucleoli" operator="lessOrEqual" value="3.5"/>
<ScoreDistribution value="B" recordCount="347.0"/>
<ScoreDistribution value="M" recordCount="6.0"/>
</Node>
<Node id="10" score="M" recordCount="15.0">
<SimplePredicate field="normalnucleoli" operator="greaterThan" value="3.5"/>
<ScoreDistribution value="B" recordCount="7.0"/>
<ScoreDistribution value="M" recordCount="8.0"/>
</Node>
</Node>
<Node id="11" score="M" recordCount="16.0">
<SimplePredicate field="clumpthickness" operator="greaterThan" value="6.5"/>
<ScoreDistribution value="B" recordCount="2.0"/>
<ScoreDistribution value="M" recordCount="14.0"/>
</Node>
</Node>
<Node id="12" score="M" recordCount="175.0">
<SimplePredicate field="uniformitycellsize" operator="greaterThan" value="3.5"/>
<ScoreDistribution value="B" recordCount="9.0"/>
<ScoreDistribution value="M" recordCount="166.0"/>
<Node id="13" score="M" recordCount="33.0">
<SimplePredicate field="uniformitycellsize" operator="lessOrEqual" value="4.5"/>
<ScoreDistribution value="B" recordCount="7.0"/>
<ScoreDistribution value="M" recordCount="26.0"/>
<Node id="14" score="M" recordCount="21.0">
<SimplePredicate field="marginaladhesion" operator="lessOrEqual" value="5.5"/>
<ScoreDistribution value="B" recordCount="7.0"/>
<ScoreDistribution value="M" recordCount="14.0"/>
<Node id="15" score="B" recordCount="10.0">
<SimplePredicate field="clumpthickness" operator="lessOrEqual" value="7.5"/>
<ScoreDistribution value="B" recordCount="6.0"/>
<ScoreDistribution value="M" recordCount="4.0"/>
</Node>
<Node id="16" score="M" recordCount="11.0">
<SimplePredicate field="clumpthickness" operator="greaterThan" value="7.5"/>
<ScoreDistribution value="B" recordCount="1.0"/>
<ScoreDistribution value="M" recordCount="10.0"/>
</Node>
</Node>
<Node id="17" score="M" recordCount="12.0">
<SimplePredicate field="marginaladhesion" operator="greaterThan" value="5.5"/>
<ScoreDistribution value="B" recordCount="0.0"/>
<ScoreDistribution value="M" recordCount="12.0"/>
</Node>
</Node>
<Node id="18" score="M" recordCount="142.0">
<SimplePredicate field="uniformitycellsize" operator="greaterThan" value="4.5"/>
<ScoreDistribution value="B" recordCount="2.0"/>
<ScoreDistribution value="M" recordCount="140.0"/>
</Node>
</Node>
</Node>
</TreeModel>
==================================================== ========================= 实现这种结果的 xsl 样式表如下所示。
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" encoding="UTF-8"/>
<xsl:template match="/">
<xsl:for-each select="//Node[not(Node)]">
<xsl:for-each select="ancestor-or-self::Node/SimplePredicate">
<xsl:value-of select="@field"/>
<xsl:choose>
<xsl:when test="@operator = 'lessOrEqual'"> <= </xsl:when>
<xsl:when test="@operator = 'greaterThan'"> > </xsl:when>
</xsl:choose>
<xsl:value-of select="@value"/>
<xsl:if test="position() != last()">
<xsl:text> ^ </xsl:text>
</xsl:if>
<xsl:if test="position() = last()">
<xsl:text> => </xsl:text>
<xsl:value-of select="../@score"/>
</xsl:if>
</xsl:for-each>
<xsl:text> </xsl:text>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
输出结果是:
Uniformity of Cell Size <= 2.5 ^ Bare Nuclei <= 5.5 => B
Uniformity of Cell Size <= 2.5 ^ Bare Nuclei > 5.5 => M
Uniformity of Cell Size > 2.5 ^ Uniformity of Cell Shape <= 2.5 ^ Clump Thickness <= 5.5 => B
Uniformity of Cell Size > 2.5 ^ Uniformity of Cell Shape <= 2.5 ^ Clump Thickness > 5.5 => M
Uniformity of Cell Size > 2.5 ^ Uniformity of Cell Shape > 2.5 => M