1. 将项目拉到本地
git clone git@github.com:jpmml/jpmml-evaluator-hive.git
2. 进到目录中安装
mvn clean install
3. 将得到的其中一个runtime的`jar`包放到HDFS上
hdfs dfs -put jpmml-evaluator-hive-runtime-1.0-SNAPSHOT.jar somedir/
4. 在hive中加载
add jar {hdfs_home}/somedir/jpmml-evaluator-hive-runtime-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION BuildArchive AS 'org.jpmml.evaluator.hive.ArchiveBuilderUDF';
DESCRIBE FUNCTION BuildArchive;
DESCRIBE FUNCTION EXTENDED BuildArchive;
5. 处理pmml
将pmml文件中Output的`probability(1)`、`probability(0)`, 括号替换掉,否则hive无法识别
6. 生成模型jar
三个参数分别是,包名,pmml本地路径,输出jar包的本地路径
SELECT BuildArchive('com.mycompany.XGBPredictor', '/home/model/xgboost_model.pmml', '/home/model/XGBPredictor.jar');
7. 将jar包放到HDFS
hdfs dfs -put XGBPredictor.jar somedir/
8. 加载包
add jar {hdfs_home}/somedir/XGBPredictor.jar;
CREATE TEMPORARY FUNCTION XGBPredictor AS 'com.mycompany.XGBPredictor';
DESCRIBE FUNCTION ModelPredictor;
DESCRIBE FUNCTION EXTENDED T3ModelPredictor;
9. 更改serialization的传输格式,不支持kyro
set hive.plan.serializati