目前正在做的项目需要用将主业务库的部分数据同步出去以作统计用,为了保持数据实时同步需要定时执行同步操作。同步工具运用了比较流行的ETL工具kettle,首先我们在kettle界面工具Spoon中设计好同步文件如 studentTableETL.ktr,然后在web项目通过quartz定时调用studentTableETL.ktr即可。
2、spring-quartz.xml
3、KettleLoader.java
<dependency>
<groupId>pentaho.kettle</groupId>
<artifactId>kettle-core</artifactId>
<version>3.2.0-GA</version>
</dependency>
<dependency>
<groupId>pentaho.kettle</groupId>
<artifactId>kettle-db</artifactId>
<version>3.2.0-GA</version>
</dependency>
<dependency>
<groupId>pentaho.kettle</groupId>
<artifactId>kettle-engine</artifactId>
<version>3.2.0-GA</version>
</dependency>
<dependency>
<groupId>pentaho.kettle</groupId>
<artifactId>kettle-ui-swt</artifactId>
<version>3.2.0-GA</version>
</dependency>
<dependency>
<groupId>pentaho.kettle</groupId>
<artifactId>kettle-vfs</artifactId>
<version>3.2.0-GA</version>
</dependency>
<dependency>
<groupId>ognl</groupId>
<artifactId>ognl</artifactId>
<version>3.0.3</version>
</dependency>
<dependency>
<groupId>org.quartz-scheduler</groupId>
<artifactId>quartz</artifactId>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</dependency>
<dependency>
<groupId>commons-digester</groupId>
<artifactId>commons-digester</artifactId>
<version>1.8</version>
</dependency>
2、spring-quartz.xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN 2.0//EN" "http://www.springframework.org/dtd/spring-beans-2.0.dtd">
<beans >
<!-- Scheduler配置 -->
<bean id="myScheduler" class="org.springframework.scheduling.quartz.SchedulerFactoryBean">
<!-- 自动启动 -->
<property name="autoStartup">
<value>true</value>
</property>
<property name="triggers">
<!-- 这里可以定时器任务列表 -->
<list>
<!-- 这里可以有多个定时器任务 -->
<ref bean="kettleTrigger"/>
</list>
</property>
</bean>
<!--以下为Trigger配置-->
<bean id="kettleTrigger" class="org.springframework.scheduling.quartz.CronTriggerFactoryBean">
<property name="jobDetail">
<ref bean="kettleDetail"/>
</property>
<property name="cronExpression">
<!-- 每1分钟触发一次 -->
<!-- value>0 * * * * ?</value-->
<!-- 每天23点59分触发一次 -->
<value>0 59 23 * * ?</value>
</property>
</bean>
<!-- 以下为JobDetail定义 -->
<bean id="kettleDetail" class="org.springframework.scheduling.quartz.MethodInvokingJobDetailFactoryBean">
<property name="targetObject"><ref bean="kettleTimer"/></property>
<property name="targetMethod"><value>kettleEtl</value></property>
<property name="concurrent" value="false"/> <!-- 防止job并发执行 -->
</bean>
<!-- Job定义-kettle ETL类 -->
<bean id="kettleTimer" class="com.anyuan.edu.sas.service.KettleLoader">
</bean>
</beans>
3、KettleLoader.java
package com.anyuan.edu.sas.service;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.util.EnvUtil;
import org.pentaho.di.trans.StepLoader;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
/**
* kettle文件加载类
* @author gaozr
*
*/
public class KettleLoader{
public void kettleEtl() throws KettleException {
String[] etlTables = {"classroomTableETL","studentTableETL","courseTableETL","chapterTableETL"};
int monitor = 0;
try{
for(int i=0;i<etlTables.length;i++){
monitor=i;
String filename = "kettlefile/"+etlTables[i]+".ktr";
callNativeTransWithParam(filename);
}
}catch(Exception e){
System.out.println(etlTables[monitor]+" failed");
}
System.out.println("ok");
}
/**
* 调用本地的转换文件(带参数)
*
* @Description:
* @param transFileName
* @throws KettleException
*/
public static void callNativeTransWithParam(String transFileName) throws KettleException {
// 初始化
EnvUtil.environmentInit();
StepLoader.init();
// 转换元对象
TransMeta transMeta = new TransMeta(transFileName);
// 转换
Trans trans = new Trans(transMeta);
String[] params = {};
// 执行转换
trans.execute(params);
// 等待转换执行结束
trans.waitUntilFinished();
}
}
我这里每次执行4个ktr文件,所以定义了一个数组String[] etlTables = {"classroomTableETL","studentTableETL","courseTableETL","chapterTableETL"};然后循环调用
如果只有一个文件的话,就不需要这个循环了。
以下为项目结构