TransmogrifAI的官方文档没有这方面的注释,导致我在实现时用了一些功夫,在读了一部分TransmogrifAI的源码后,我发现了一些获取这些日志的方法。
可以参考TransmogrifAI源码的获取方法,详细请查看modelInsights.selectedModelInfo方法源码
直接放代码:
获取模型最佳设置参数
val selectedModelInfo: Option[ModelSelectorSummary] = modelInsights.selectedModelInfo
val excludedParams = Set(
SparkWrapperParams.SparkStageParamName,
ModelSelectorNames.outputParamName, ModelSelectorNames.inputParam1Name,
ModelSelectorNames.inputParam2Name, ModelSelectorNames.outputParamName,
OpPipelineStageParamsNames.InputFeatures, OpPipelineStageParamsNames.InputSchema,
OpPipelineStageParamsNames.OutputMetadata,
"labelCol", "predictionCol", "predictionValueCol", "rawPredictionCol", "probabilityCol"
)
val name = selectedModelInfo.map(sm => s"Selected Model - ${sm.bestModelType}").getOrElse("")
val validationResults = (for {
sm <- selectedModelInfo.toSeq
e <- sm.validationResults.filter(v =>
v.modelUID == sm.bestModelUID && v.modelName == sm.bestModelName && v.modelType == sm.bestModelType
)
} yield {
val params = e.modelParameters.filterKeys(!excludedParams.contains(_))
Seq("name" -> e.modelName, "uid" -> e.modelUID, "modelType" -> e.modelType) ++ params
}).flatten.sortBy(_._1)
if (validationResults.nonEmpty) {
val table = Table(name = name, columns = Seq("Model Param", "Value"), rows = validationResults)
Seq(table.prettyString())
} else Seq.empty
validationResults
获取最佳模型信息
val selectedModelInfo: Option[ModelSelectorSummary] = modelInsights.selectedModelInfo
val opModelName: String = selectedModelInfo.map(sm => s"${sm.bestModelType}").getOrElse("")
opModelName.slice(2, opModelName.size);
获取模型特征
// 获取模型特征
val modelFeatures: Seq[Insights] = modelInsights.features.flatMap(feature => feature.derivedFeatures)
val featureContributions: Seq[(String, Double)] = modelFeatures.map(feature => (feature.derivedFeatureName,
feature.contribution.map(contribution => math.abs(contribution))
.foldLeft(0.0) { (max, contribution) => math.max(max, contribution) }))
val sortedContributions: Seq[(String, Double)] = featureContributions.sortBy(contribution => -contribution._2)
val topNum: Int = math.min(20, sortedContributions.size)
// println(s"Top $topNum 特征贡献:")
// sortedContributions.take(topNum).foreach(featureInfo => println(s"${featureInfo._1}: ${featureInfo._2}"))
sortedContributions.take(topNum)