一、简介
PMML(Predictive Model Markup Language)作为模型标记语言,以XML 为载体呈现数据挖掘模型,支持在不同的平台之间轻松共享。通常用各种模型开发软件直接转换的PMML文件是单个模型或者已固定的集成模型。最近在业务中碰到一个组合模型的需求,深入研究了下,发现PMML可支持直接在PMML文件中组合已有的模型,适合业务中个性化的模型需求。
二、模型说明
以下面模型(LR+决策树+线性组合)为例:
三、代码
复合模型PMML代码摘要如下:
<MiningModel functionName="regression">
<MiningSchema>
<MiningField name="A" usageType="active"/>
<MiningField name="B" usageType="active"/>
<MiningField name="C" usageType="active"/>
<MiningField name="E" usageType="active"/>
<MiningField name="CombinedRate" usageType="target"/>
<MiningField name="D" usageType="active"/>
<MiningField name="flag" usageType="target"/>
<MiningField name="F" usageType="active"/>
<MiningField name="G" usageType="active"/>
</MiningSchema>
<Segmentation multipleModelMethod="modelChain">
<Segment id="1">
<True/>
<GeneralRegressionModel modelType="generalizedLinear" functionName="classification"
linkFunction="logit" distribution="binomial">
<MiningSchema>
<MiningField name="flag" usageType="target"/>
<MiningField name="A"/>
<MiningField name="B"/>
<MiningField name="C"/>
<MiningField name="D"/>
<MiningField name="E"/>
</MiningSchema>
<Output>
<OutputField name="Index" optype="continuous" dataType="double"
feature="probability" value="1"/>
</Output>
......
<ParamMatrix>
<PCell targetCategory="1" parameterName="p0" beta="x0"/>
<PCell targetCategory="1" parameterName="p1" beta="x1"/>
<PCell targetCategory="1" parameterName="p2" beta="x2"/>
<PCell targetCategory="1" parameterName="p3" beta="x3"/>
<PCell targetCategory="1" parameterName="p4" beta="x4"/>
<PCell targetCategory="1" parameterName="p5" beta="x5"/>
</ParamMatrix>
</GeneralRegressionModel>
</Segment>
<Segment id="2">
<True/>
<MiningModel modelName="Probability2" functionName="regression">
<MiningSchema>
<MiningField name="CombinedRate" usageType="target"/>
<MiningField name="Index" usageType="active"/>
<MiningField name="F" usageType="active"/>
<MiningField name="G" usageType="active"/>
</MiningSchema>
<Output>
<OutputField dataType="double" feature="predictedValue" name="CombinedRate"
optype="continuous"/>
</Output>
<Segmentation multipleModelMethod="selectFirst">
<Segment id="2.1">
<CompoundPredicate booleanOperator="and">
<SimplePredicate field="F" operator="greaterOrEqual" value="2"/>
<SimplePredicate field="G" operator="greaterOrEqual" value="10"/>
</CompoundPredicate>
<RegressionModel modelName="CombinedRate1" functionName="regression">
<MiningSchema>
<MiningField name="Index" usageType="active"/>
<MiningField name="F" usageType="active"/>
<MiningField name="CombinedRate" usageType="target"/>
</MiningSchema>
<Output>
<OutputField dataType="double" feature="predictedValue" name="CombinedRate" optype="continuous"/>
</Output>
<RegressionTable targetCategory="no" intercept="1">
<NumericPredictor coefficient="0" exponent="1" name="Index"/>
<NumericPredictor coefficient="0" exponent="1" name="F"/>
</RegressionTable>
</RegressionModel>
</Segment>
<Segment id="2.2">
<CompoundPredicate booleanOperator="or">
<SimplePredicate field="F" operator="lessThan" value="2"/>
<SimplePredicate field="G" operator="lessThan" value="10"/>
</CompoundPredicate>
<RegressionModel modelName="CombinedRate2" functionName="regression">
<MiningSchema>
<MiningField name="Index" usageType="active"/>
<MiningField name="F" usageType="active"/>
<MiningField name="CombinedRate" usageType="target"/>
</MiningSchema>
<Output>
<OutputField dataType="double" feature="predictedValue" name="CombinedRate" optype="continuous"/>
</Output>
<RegressionTable targetCategory="no" intercept="0">
<NumericPredictor coefficient="0.5" exponent="1" name="Index"/>
<NumericPredictor coefficient="0.5" exponent="1" name="F"/>
</RegressionTable>
</RegressionModel>
</Segment>
</Segmentation>
</MiningModel>
</Segment>
</Segmentation>
</MiningModel>
四、参考资料
版权声明: 本文由李兴创作和发表,采用署名(BY)-非商业性使用(NC)-禁止演绎(ND)4.0国际许可协议进行许可,转载请注明作者及出处,本文作者为李兴,本文标题为复合PMML——直接在PMML文件中组合模型的方法,本文链接为https://blog.csdn.net/simonlishx2017/article/details/84778739.商业用途请联系作者本人获得授权。
License
this paper is licensed under a Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International License. Please contact me for other usage.