1、LightGBM
多分类
import os
import lightgbm as lgb
from sklearn import datasets
from sklearn.model_selection import train_test_split
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=56)
# create dataset for lightgbm
lgb_train = lgb.Dataset(X_train, y_train)
lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)
# specify your configurations as a dict
params = {
'objective': 'multiclass',
'num_class': 3,
'num_leaves': 5,
'metric': {'multi_logloss', 'multi_error'},
'verbose': 0
}
evals_result = {} # to record eval results for plotting
print('Starting training...')
# train
gbm = lgb.train(params,
lgb_train,
num_boost_round=5,
valid_sets=[lgb_train, lgb_test],
feature_name=list(iris.feature_names),
evals_result=evals_result,
verbose_eval=1)
gbm.save_model('lgb_model.txt')
2、转为PMML文件
GitHub:JPMML-LightGBM
或CSDN直接下载jpmml-lightgbm-executable-1.3.7.jar
java -jar "path\to\jpmml-lightgbm-executable-1.3.7.jar" --lgbm-input lgb_model.txt --pmml-output lgb_model.pmml
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<PMML xmlns="http://www.dmg.org/PMML-4_4" xmlns:data="http://jpmml.org/jpmml-model/InlineTable" version="4.4">
<Header>
<Application name="JPMML-LightGBM" version="1.3.7" />
<Timestamp>2021-06-01T11:54:38Z</Timestamp>
</Header>
<DataDictionary>
<DataField name="_target" optype="categorical" dataType="integer">
<Value value="0" />
<Value value="1" />
<Value value="2" />
</DataField>
<DataField name="sepal_length_(cm)" optype="continuous" dataType="double">
<Interval closure="closedClosed" leftMargin="4.3" rightMargin="7.9" />
<Value value="NaN" property="missing" />
</DataField>
<DataField name="sepal_width_(cm)" optype="continuous" dataType="double">
<Interval closure="closedClosed" leftMargin="2.0" rightMargin="4.4" />
<Value value="NaN" property="missing" />
</DataField>
<DataField name="petal_length_(cm)" optype="continuous" dataType="double">
<Interval closure="closedClosed" leftMargin="1.0" rightMargin="6.9" />
<Value value="NaN" property="missing" />
</DataField>
<DataField name="petal_width_(cm)" optype="continuous" dataType="double">
<Interval closure="closedClosed" leftMargin="0.1" rightMargin="2.5" />
<Value value="NaN" property="missing" />
</DataField>
</DataDictionary>
<MiningModel functionName="classification" algorithmName="LightGBM">
<MiningSchema>
<MiningField name="_target" usageType="target" />
<MiningField name="sepal_length_(cm)" importance="11.0" invalidValueTreatment="asIs" />
<MiningField name="sepal_width_(cm)" importance="2.0" invalidValueTreatment="asIs" />
<MiningField name="petal_length_(cm)" importance="24.0" invalidValueTreatment="asIs" />
<MiningField name="petal_width_(cm)" importance="13.0" invalidValueTreatment="asIs" />
</MiningSchema>
<Segmentation multipleModelMethod="modelChain" missingPredictionTreatment="returnMissing">
<Segment id="1">
<True />
<MiningModel functionName="regression">
<MiningSchema>
<MiningField name="sepal_length_(cm)" />
<MiningField name="petal_length_(cm)" />
</MiningSchema>
<Output>
<OutputField name="lgbmValue(0)" optype="continuous" dataType="double" isFinalResult="false" />
</Output>
<Segmentation multipleModelMethod="sum">
<Segment id="1">
<True />
<TreeModel functionName="regression" noTrueChildStrategy="returnLastPrediction">
<MiningSchema>
<MiningField name="sepal_length_(cm)" />
<MiningField name="petal_length_(cm)" />
</MiningSchema>
<Node score="-0.9593265712691172">
<True />
<Node score="-1.1736122903444903">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="3.4000000000000004" />
</Node>
<Node score="-0.9486122853153485">
<SimplePredicate field="sepal_length_(cm)" operator="greaterThan" value="5.050000000000001" />
</Node>
</Node>
</TreeModel>
</Segment>
<Segment id="2">
<True />
<TreeModel functionName="regression" noTrueChildStrategy="returnLastPrediction">
<MiningSchema>
<MiningField name="sepal_length_(cm)" />
<MiningField name="petal_length_(cm)" />
</MiningSchema>
<Node score="0.12185392449415344">
<True />
<Node score="-0.07257055906750128">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="3.4000000000000004" />
<Node score="-0.07207145404943262">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="4.750000000000001" />
<Node score="-0.07219570607417626">
<SimplePredicate field="sepal_length_(cm)" operator="greaterThan" value="6.450000000000001" />
</Node>
</Node>
</Node>
<Node score="0.1303003664697049">
<SimplePredicate field="sepal_length_(cm)" operator="greaterThan" value="5.050000000000001" />
</Node>
</Node>
</TreeModel>
</Segment>
<Segment id="3">
<True />
<TreeModel functionName="regression" noTrueChildStrategy="returnLastPrediction">
<MiningSchema>
<MiningField name="sepal_length_(cm)" />
<MiningField name="petal_length_(cm)" />
</MiningSchema>
<Node score="0.10845272227015224">
<True />
<Node score="-0.07021340444377018">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="3.4000000000000004" />
<Node score="-0.06973716059085941">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="4.750000000000001" />
<Node score="-0.0697703896715927">
<SimplePredicate field="sepal_length_(cm)" operator="greaterThan" value="6.450000000000001" />
</Node>
</Node>
</Node>
<Node score="0.11590997871777123">
<SimplePredicate field="sepal_length_(cm)" operator="greaterThan" value="5.050000000000001" />
</Node>
</Node>
</TreeModel>
</Segment>
<Segment id="4">
<True />
<TreeModel functionName="regression" noTrueChildStrategy="returnLastPrediction">
<MiningSchema>
<MiningField name="sepal_length_(cm)" />
<MiningField name="petal_length_(cm)" />
</MiningSchema>
<Node score="0.09842698080005607">
<True />
<Node score="-0.06827343512337868">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="3.4000000000000004" />
<Node score="-0.06761783054533778">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="4.750000000000001" />
<Node score="-0.06764010026474891">
<SimplePredicate field="sepal_length_(cm)" operator="greaterThan" value="6.450000000000001" />
</Node>
</Node>
</Node>
<Node score="0.10504313154015506">
<SimplePredicate field="sepal_length_(cm)" operator="greaterThan" value="5.050000000000001" />
</Node>
</Node>
</TreeModel>
</Segment>
<Segment id="5">
<True />
<TreeModel functionName="regression" noTrueChildStrategy="returnLastPrediction">
<MiningSchema>
<MiningField name="sepal_length_(cm)" />
<MiningField name="petal_length_(cm)" />
</MiningSchema>
<Node score="0.0904309166215982">
<True />
<Node score="-0.06652987665633432">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="3.4000000000000004" />
<Node score="-0.0658371655887295">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="4.750000000000001" />
<Node score="-0.06572093661554763">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="5.550000000000001" />
</Node>
</Node>
</Node>
<Node score="0.09656194725537585">
<SimplePredicate field="sepal_length_(cm)" operator="greaterThan" value="5.050000000000001" />
</Node>
</Node>
</TreeModel>
</Segment>
</Segmentation>
</MiningModel>
</Segment>
<Segment id="2">
<True />
<MiningModel functionName="regression">
<MiningSchema>
<MiningField name="petal_length_(cm)" />
<MiningField name="petal_width_(cm)" />
</MiningSchema>
<Output>
<OutputField name="lgbmValue(1)" optype="continuous" dataType="double" isFinalResult="false" />
</Output>
<Segmentation multipleModelMethod="sum">
<Segment id="1">
<True />
<TreeModel functionName="regression" noTrueChildStrategy="returnLastPrediction">
<MiningSchema>
<MiningField name="petal_length_(cm)" />
</MiningSchema>
<Node score="-1.1736122903444903">
<True />
<Node score="-0.9659199780098979">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="1.8" />
<Node score="-1.1736122903444903">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="4.950000000000001" />
</Node>
<Node score="-1.002183715084192">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="4.450000000000001" />
</Node>
</Node>
</Node>
</TreeModel>
</Segment>
<Segment id="2">
<True />
<TreeModel functionName="regression" noTrueChildStrategy="returnLastPrediction">
<MiningSchema>
<MiningField name="petal_width_(cm)" />
</MiningSchema>
<Node score="-0.07250517806119815">
<True />
<Node score="0.11645485855092522">
<SimplePredicate field="petal_width_(cm)" operator="greaterThan" value="0.45000000000000007" />
<Node score="-0.06700056833064691">
<SimplePredicate field="petal_width_(cm)" operator="greaterThan" value="1.6500000000000001" />
</Node>
<Node score="0.0981912222296533">
<SimplePredicate field="petal_width_(cm)" operator="greaterThan" value="1.3500000000000003" />
</Node>
</Node>
</Node>
</TreeModel>
</Segment>
<Segment id="3">
<True />
<TreeModel functionName="regression" noTrueChildStrategy="returnLastPrediction">
<MiningSchema>
<MiningField name="petal_length_(cm)" />
<MiningField name="petal_width_(cm)" />
</MiningSchema>
<Node score="-0.07006593404406375">
<True />
<Node score="0.1050172735135845">
<SimplePredicate field="petal_width_(cm)" operator="greaterThan" value="0.45000000000000007" />
<Node score="-0.07081316273464346">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="4.950000000000001" />
</Node>
<Node score="0.08054282244171226">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="4.450000000000001" />
</Node>
</Node>
</Node>
</TreeModel>
</Segment>
<Segment id="4">
<True />
<TreeModel functionName="regression" noTrueChildStrategy="returnLastPrediction">
<MiningSchema>
<MiningField name="petal_length_(cm)" />
</MiningSchema>
<Node score="-0.06811367792034477">
<True />
<Node score="0.09588720003106202">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="1.8" />
<Node score="-0.06868727088904823">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="4.950000000000001" />
</Node>
<Node score="0.07317294292378695">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="4.450000000000001" />
</Node>
</Node>
</Node>
</TreeModel>
</Segment>
<Segment id="5">
<True />
<TreeModel functionName="regression" noTrueChildStrategy="returnLastPrediction">
<MiningSchema>
<MiningField name="petal_width_(cm)" />
</MiningSchema>
<Node score="-0.06617439842593348">
<True />
<Node score="0.08703239160896648">
<SimplePredicate field="petal_width_(cm)" operator="greaterThan" value="0.45000000000000007" />
<Node score="-0.06143345433867317">
<SimplePredicate field="petal_width_(cm)" operator="greaterThan" value="1.6500000000000001" />
</Node>
<Node score="0.07675765159801226">
<SimplePredicate field="petal_width_(cm)" operator="greaterThan" value="1.3500000000000003" />
</Node>
</Node>
</Node>
</TreeModel>
</Segment>
</Segmentation>
</MiningModel>
</Segment>
<Segment id="3">
<True />
<MiningModel functionName="regression">
<MiningSchema>
<MiningField name="sepal_length_(cm)" />
<MiningField name="sepal_width_(cm)" />
<MiningField name="petal_length_(cm)" />
<MiningField name="petal_width_(cm)" />
</MiningSchema>
<Output>
<OutputField name="lgbmValue(2)" optype="continuous" dataType="double" isFinalResult="false" />
</Output>
<Segmentation multipleModelMethod="sum">
<Segment id="1">
<True />
<TreeModel functionName="regression" noTrueChildStrategy="returnLastPrediction">
<MiningSchema>
<MiningField name="sepal_length_(cm)" />
<MiningField name="petal_length_(cm)" />
</MiningSchema>
<Node score="-1.1638296814301798">
<True />
<Node score="-0.9877427209725906">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="4.750000000000001" />
<Node score="-0.9486122853153485">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="5.550000000000001" />
</Node>
</Node>
<Node score="-1.1736122903444903">
<SimplePredicate field="sepal_length_(cm)" operator="greaterThan" value="5.050000000000001" />
</Node>
</Node>
</TreeModel>
</Segment>
<Segment id="2">
<True />
<TreeModel functionName="regression" noTrueChildStrategy="returnLastPrediction">
<MiningSchema>
<MiningField name="sepal_width_(cm)" />
<MiningField name="petal_width_(cm)" />
</MiningSchema>
<Node score="-0.0724357261619468">
<True />
<Node score="0.12736121841495812">
<SimplePredicate field="petal_width_(cm)" operator="greaterThan" value="1.6500000000000001" />
</Node>
<Node score="-0.0319429343565856">
<SimplePredicate field="petal_width_(cm)" operator="greaterThan" value="1.3500000000000003" />
</Node>
<Node score="-0.07213929036587866">
<SimplePredicate field="sepal_width_(cm)" operator="greaterThan" value="3.35" />
</Node>
</Node>
</TreeModel>
</Segment>
<Segment id="3">
<True />
<TreeModel functionName="regression" noTrueChildStrategy="returnLastPrediction">
<MiningSchema>
<MiningField name="sepal_length_(cm)" />
<MiningField name="petal_length_(cm)" />
<MiningField name="petal_width_(cm)" />
</MiningSchema>
<Node score="-0.059662719835815003">
<True />
<Node score="0.08831532996239182">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="4.750000000000001" />
<Node score="0.11732538921269522">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="5.550000000000001" />
</Node>
</Node>
<Node score="-0.06975022798344958">
<SimplePredicate field="sepal_length_(cm)" operator="greaterThan" value="5.050000000000001" />
<Node score="-0.07092820238292231">
<SimplePredicate field="petal_width_(cm)" operator="greaterThan" value="1.2500000000000002" />
</Node>
</Node>
</Node>
</TreeModel>
</Segment>
<Segment id="4">
<True />
<TreeModel functionName="regression" noTrueChildStrategy="returnLastPrediction">
<MiningSchema>
<MiningField name="sepal_length_(cm)" />
<MiningField name="petal_length_(cm)" />
<MiningField name="petal_width_(cm)" />
</MiningSchema>
<Node score="-0.05718847818792119">
<True />
<Node score="0.08026262561477648">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="4.750000000000001" />
<Node score="0.10583777787647669">
<SimplePredicate field="petal_length_(cm)" operator="greaterThan" value="5.550000000000001" />
</Node>
</Node>
<Node score="-0.0676030312212233">
<SimplePredicate field="sepal_length_(cm)" operator="greaterThan" value="5.050000000000001" />
<Node score="-0.06893421344995508">
<SimplePredicate field="petal_width_(cm)" operator="greaterThan" value="1.2500000000000002" />
</Node>
</Node>
</Node>
</TreeModel>
</Segment>
<Segment id="5">
<True />
<TreeModel functionName="regression" noTrueChildStrategy="returnLastPrediction">
<MiningSchema>
<MiningField name="sepal_width_(cm)" />
<MiningField name="petal_width_(cm)" />
</MiningSchema>
<Node score="-0.0662759744408909">
<True />
<Node score="0.09578590305317655">
<SimplePredicate field="petal_width_(cm)" operator="greaterThan" value="1.6500000000000001" />
</Node>
<Node score="-0.029336428814000854">
<SimplePredicate field="petal_width_(cm)" operator="greaterThan" value="1.3500000000000003" />
</Node>
<Node score="-0.06563769706964893">
<SimplePredicate field="sepal_width_(cm)" operator="greaterThan" value="3.2500000000000004" />
</Node>
</Node>
</TreeModel>
</Segment>
</Segmentation>
</MiningModel>
</Segment>
<Segment id="4">
<True />
<RegressionModel functionName="classification" normalizationMethod="softmax">
<MiningSchema>
<MiningField name="_target" usageType="target" />
<MiningField name="lgbmValue(0)" />
<MiningField name="lgbmValue(1)" />
<MiningField name="lgbmValue(2)" />
</MiningSchema>
<Output>
<OutputField name="probability(0)" optype="continuous" dataType="double" feature="probability" value="0" />
<OutputField name="probability(1)" optype="continuous" dataType="double" feature="probability" value="1" />
<OutputField name="probability(2)" optype="continuous" dataType="double" feature="probability" value="2" />
</Output>
<RegressionTable intercept="0.0" targetCategory="0">
<NumericPredictor name="lgbmValue(0)" coefficient="1.0" />
</RegressionTable>
<RegressionTable intercept="0.0" targetCategory="1">
<NumericPredictor name="lgbmValue(1)" coefficient="1.0" />
</RegressionTable>
<RegressionTable intercept="0.0" targetCategory="2">
<NumericPredictor name="lgbmValue(2)" coefficient="1.0" />
</RegressionTable>
</RegressionModel>
</Segment>
</Segmentation>
</MiningModel>
</PMML>
3、Java调用解析PMML文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.gld</groupId>
<artifactId>apple</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.jpmml</groupId>
<artifactId>pmml-evaluator</artifactId>
<version>1.5.15</version>
</dependency>
</dependencies>
</project>
import org.dmg.pmml.FieldName;
import org.jpmml.evaluator.*;
import org.xml.sax.SAXException;
import javax.xml.bind.JAXBException;
import java.io.File;
import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
public class Server {
public static void main(String[] args) throws JAXBException, SAXException, IOException {
// Building a model evaluator from a PMML file
String modelPath = "model.pmml";
System.out.println(modelPath);
Evaluator evaluator = new LoadingModelEvaluatorBuilder().load(new File(modelPath)).build();
// Performing the self-check
evaluator.verify();
// Printing input (x1, x2, .., xn) fields
List<? extends InputField> inputFields = evaluator.getInputFields();
System.out.println("Input fields: ");
for (InputField inputField : inputFields) {
System.out.println(inputField);
}
// Printing primary result (y) field(s)
List<? extends TargetField> targetFields = evaluator.getTargetFields();
System.out.println("Target field(s): " + targetFields);
// Printing secondary result (eg. probability(y), decision(y)) fields
List<? extends OutputField> outputFields = evaluator.getOutputFields();
System.out.println("Output fields: ");
for (OutputField outputField : outputFields) {
System.out.println(outputField);
}
// Predicting
Map<String, Double> inputRecord = new LinkedHashMap<String, Double>();
// 5.1, 3.5, 1.4, 0.2 -> 0
// 6.4, 3.2, 4.5, 1.5 -> 1
// 5.9, 3. , 5.1, 1.8 -> 2
inputRecord.put("sepal_length_(cm)", 5.1);
inputRecord.put("sepal_width_(cm)", 3.5);
inputRecord.put("petal_length_(cm)", 1.4);
inputRecord.put("petal_width_(cm)", 0.2);
Map<FieldName, FieldValue> arguments = new LinkedHashMap<FieldName, FieldValue>();
// Mapping the record field-by-field from data source schema to PMML schema
for (InputField inputField : inputFields) {
FieldName inputName = inputField.getName();
Object rawValue = inputRecord.get(inputName.getValue());
// Transforming an arbitrary user-supplied value to a known-good PMML value
FieldValue inputValue = inputField.prepare(rawValue);
arguments.put(inputName, inputValue);
}
// Evaluating the model with known-good arguments
Map<FieldName, ?> results = evaluator.evaluate(arguments);
System.out.println(results);
// Decoupling results from the JPMML-Evaluator runtime environment
Map<String, ?> resultRecord = EvaluatorUtil.decodeAll(results);
System.out.println(resultRecord);
}
}
Output:
Input fields:
InputField{name=sepal_length_(cm), fieldName=sepal_length_(cm), displayName=null, dataType=double, opType=continuous}
InputField{name=sepal_width_(cm), fieldName=sepal_width_(cm), displayName=null, dataType=double, opType=continuous}
InputField{name=petal_length_(cm), fieldName=petal_length_(cm), displayName=null, dataType=double, opType=continuous}
InputField{name=petal_width_(cm), fieldName=petal_width_(cm), displayName=null, dataType=double, opType=continuous}
Target field(s): [TargetField{name=_target, fieldName=_target, displayName=null, dataType=integer, opType=categorical}]
Output fields:
OutputField{name=probability(0), fieldName=probability(0), displayName=null, dataType=double, opType=continuous, finalResult=true, depth=1}
OutputField{name=probability(1), fieldName=probability(1), displayName=null, dataType=double, opType=continuous, finalResult=true, depth=1}
OutputField{name=probability(2), fieldName=probability(2), displayName=null, dataType=double, opType=continuous, finalResult=true, depth=1}
{_target=ProbabilityDistribution{result=0, probability_entries=[0=0.5635697635950065, 1=0.2180264776286243, 2=0.21840375877636908]}, probability(0)=0.5635697635950065, probability(1)=0.2180264776286243, probability(2)=0.21840375877636908}
{_target=0, probability(0)=0.5635697635950065, probability(1)=0.2180264776286243, probability(2)=0.21840375877636908}
4、Python调用解析PMML文件
GitHub:JPMML-Evaluator-Python
首先安装:pip install jpmml_evaluator
from jpmml_evaluator.pyjnius import jnius_configure_classpath, PyJNIusBackend
# Configure JVM
jnius_configure_classpath()
# Construct a PyJNIus backend
backend = PyJNIusBackend()
from jpmml_evaluator import make_evaluator
evaluator = make_evaluator(backend, "lgb_model.pmml") .verify()
input_fields = evaluator.getInputFields()
print("\r\nInput fields: " + str([input_field.getName() for input_field in input_fields]))
target_fields = evaluator.getTargetFields()
print("\r\nTarget field(s): " + str([target_field.getName() for target_field in target_fields]))
output_fields = evaluator.getOutputFields()
print("\r\nOutput fields: " + str([output_field.getName() for output_field in output_fields]))
args = {
"sepal_length_(cm)": 5.1,
"sepal_width_(cm)": 3.5,
"petal_length_(cm)": 1.4,
"petal_width_(cm)": 0.2
}
result = evaluator.evaluate(args)
print('\r\n',result) # {'_target': 0, 'probability(0)': 0.5635697635950065, 'probability(1)': 0.2180264776286243, 'probability(2)': 0.21840375877636908}