java集成kettle后,可通过java程序调用kettle转换文件、kettle任务,对数据进行抽取转换清洗。
整合步骤如下:
第一步声明必须的依赖
pom.xml文件如下:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.2.1.RELEASE</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.pingan.pase.cockpit</groupId>
<artifactId>train</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>train</name>
<description>Demo project for Spring Boot</description>
<properties>
<java.version>1.8</java.version>
<!-- <kettle.version>6.1.0.1-196</kettle.version>-->
<kettle.version>8.1.0.0-365</kettle.version>
<js.version>1.7R3</js.version>
<commons-codec.version>1.10</commons-codec.version>
<commons-beanutils.version>1.9.3</commons-beanutils.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>com.mzlion</groupId>
<artifactId>easy-okhttp</artifactId>
<version>1.0.0-Final</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.junit.vintage</groupId>
<artifactId>junit-vintage-engine</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.62</version>
</dependency>
<dependency>
<groupId>pentaho-kettle</groupId>
<artifactId>kettle-core</artifactId>
<version>${kettle.version}</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/kettle-core-8.1.0.0-365.jar</systemPath>
</dependency>
<dependency>
<groupId>com.verhas</groupId>
<artifactId>license3j</artifactId>
<version>1.0.7 </version>
</dependency>
<!-- <dependency>
<groupId>pentaho-kettle</groupId>
<artifactId>kettle-dbdialog</artifactId>
<version>${kettle.version}</version>
</dependency>-->
<dependency>
<groupId>pentaho-kettle</groupId>
<artifactId>kettle-engine</artifactId>
<version>${kettle.version}</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/kettle-engine-8.1.0.0-365.jar</systemPath>
</dependency>
<!--<dependency>
<groupId>pentaho</groupId>
<artifactId>pentaho-vfs-browser</artifactId>
<version>${kettle.version}</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/pentaho-vfs-browser-8.1.0.0-365</systemPath>
</dependency>-->
<dependency>
<groupId>pentaho</groupId>
<artifactId>metastore</artifactId>
<version>${kettle.version}</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/metastore-8.1.0.0-365.jar</systemPath>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-vfs2 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-vfs2</artifactId>
<version>2.2</version>
</dependency>
<!--<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.10</version>
</dependency>-->
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
<!-- <dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.7</version>
</dependency>-->
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.1.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-codec/commons-codec -->
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.10</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-lang/commons-lang -->
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.46</version>
</dependency>
<dependency>
<groupId>rhino</groupId>
<artifactId>js</artifactId>
<version>${js.version}</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/js-1.7R3.jar</systemPath>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>commons-beanutils</groupId>
<artifactId>commons-beanutils</artifactId>
<version>${commons-beanutils.version}</version>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>${commons-codec.version}</version>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>17.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.javassist/javassist -->
<dependency>
<groupId>org.javassist</groupId>
<artifactId>javassist</artifactId>
<version>3.20.0-GA</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.2</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
<repositories>
<!-- <repository>
<id>pentaho-releases</id>
<name>kettle</name>
<url>https://nexus.pentaho.org/content/groups/omni/</url>
</repository>-->
<repository>
<id>public</id>
<name>aliyun nexus</name>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
</repository>
</repositories>
</project>
第二步编写集成kettle相关的类:
KettleUtil.java : kettle工具类
package util;
import org.pentaho.di.core.KettleEnvironment;
import org.pentaho.di.core.util.EnvUtil;
import org.pentaho.di.job.Job;
import org.pentaho.di.job.JobMeta;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
public class KettleUtil {
/**
* 调用trans文件
*
* @param transFileName
* @throws Exception
*/
public static void callNativeTrans(String transFileName) throws Exception {
callNativeTransWithParams(null, transFileName);
}
/**
* 调用trans文件 带参数的
*
* @param params
* @param transFileName
* @throws Exception
*/
public static void callNativeTransWithParams(String[] params, String transFileName) throws Exception {
// 初始化
KettleEnvironment.init();
EnvUtil.environmentInit();
TransMeta transMeta = new TransMeta(transFileName);
//转换
Trans trans = new Trans(transMeta);
//执行
trans.execute(params);
//等待结束
trans.waitUntilFinished();
//抛出异常
if (trans.getErrors() > 0) {
throw new Exception("There are errors during transformation exception!(传输过程中发生异常)");
}
}
/**
* 调用job文件
*
* @param jobName
* @throws Exception
*/
public static void callNativeJob(String jobName) throws Exception {
// 初始化
KettleEnvironment.init();
JobMeta jobMeta = new JobMeta(jobName, null);
Job job = new Job(null, jobMeta);
//向Job 脚本传递参数,脚本中获取参数值:${参数名}
//job.setVariable(paraname, paravalue);
job.start();
job.waitUntilFinished();
if (job.getErrors() > 0) {
throw new Exception("There are errors during job exception!(执行job发生异常)");
}
}
}
KettleTaskJob.java :kettle任务类
package util;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class KettleTaskJob {
public void run() throws Exception {
log.info("*****kettle定时任务运行开始******");
String transFileName = "D:/01develop/06pdi-ce-8.1.0.0-365/ktr/goods.ktr";
KettleUtil.callNativeTrans(transFileName);
log.info("*****kettle定时任务运行结束******");
}
public void runTrans(String transFileName) throws Exception {
log.info("*****kettle定时任务运行开始******");
// 获取URL
String filePath=FileUtil.getResourceFilePath(transFileName);
KettleUtil.callNativeTrans(filePath);
log.info("*****kettle定时任务运行结束******");
}
public void runJob() throws Exception {
log.info("*****kettle定时任务运行开始******");
String transFileName = "D:/01develop/06pdi-ce-8.1.0.0-365/job/goods.kjb";
KettleUtil.callNativeJob(transFileName);
log.info("*****kettle定时任务运行结束******");
}
public static void main(String[] args) throws Exception {
KettleTaskJob job = new KettleTaskJob();
String transFileName="/ktr/goods.ktr";
job.runTrans(transFileName);
//job.runJob();
}
}
FileUtil.java 文件工具类
package util;
import java.net.URL;
public class FileUtil {
/**
* 获取资源文件地址
* @param fullFileName
* @return
*/
public static String getResourceFilePath(String fullFileName){
URL url = FileUtil.class.getResource(fullFileName);
String path=url.getPath();
return path;
}
}
由于kettle依赖的几个核心jar包,不在maven中央仓库,所以如果需要下载相关的依赖需要声明,kettle依赖所在的仓库地址
<repository>
<id>pentaho-releases</id>
<name>kettle</name>
<url>https://nexus.pentaho.org/content/groups/omni/</url>
</repository>
但是此仓库地址下载依赖会很慢,可能会下载失败。所以本项目的pom.xml通过引入本地依赖的方式将相关的依赖引入项目中,
1、先将相关依赖下载到本地,然后将依赖拷贝到项目的src目录,
2、通过maven本地声明,将依赖引入项目中,如下所示
<dependency>
<groupId>pentaho-kettle</groupId>
<artifactId>kettle-core</artifactId>
<version>${kettle.version}</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/kettle-core-8.1.0.0-365.jar</systemPath>
</dependency>
相关依赖下载地址: java整合kettle所需jar包_java整合kettle-Java代码类资源-CSDN下载
项目源码下载地址:springboot整合kettle项目源码_springboot整合kettle,kettlespringboot-Java代码类资源-CSDN下载