java代码整合kettle9.3实现读取表中的数据,生成excel文件
1.简介
本次使用java代码整合kettle9.3版本,数据库使用mysql。
2.jar包导入
项目需要依赖部分kettle中的jar包,请将这部分jar包自行导入maven仓库。
<dependency>
<groupId>com.kettle</groupId>
<artifactId>kettle-engine</artifactId>
<version>9.3</version>
</dependency>
<dependency>
<groupId>com.kettle</groupId>
<artifactId>metastore</artifactId>
<version>9.3</version>
</dependency>
<dependency>
<groupId>com.kettle</groupId>
<artifactId>kettle-core</artifactId>
<version>9.3</version>
</dependency>
<dependency>
<groupId>com.kettle</groupId>
<artifactId>pentaho-encryption-support</artifactId>
<version>9.3</version>
</dependency>
<dependency>
<groupId>com.kettle</groupId>
<artifactId>monetdb-jdbc</artifactId>
<version>2.8</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-vfs2</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>com.kettle</groupId>
<artifactId>jxl</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>com.kettle</groupId>
<artifactId>commons-dbcp</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>com.kettle</groupId>
<artifactId>commons-pool</artifactId>
<version>1.5</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.47</version>
<scope>runtime</scope>
</dependency>
可以通过maven命令的形式导入:
mvn install:install-file -Dfile=jar包地址 -DgroupId=jar包分组id -DartifactId=jar包名称id -Dversion=jar包版本 -Dpackaging=jar
如:
mvn install:install-file -Dfile=D:\worktool\kettle\pdi-ce-7.1.0.0-12\data-integration\lib\kettle-core-9.3.0.0-12.jar -DgroupId=com.kettle -DartifactId=kettle-core -Dversion=9.3 -Dpackaging=jar
3.项目所需xml配置
项目整合kettle启动时会爆需要kettle-password-encoder-plugins.xml文件。
kettle-password-encoder-plugins.xml
<password-encoder-plugins>
<password-encoder-plugin id="Kettle">
<description>Kettle Password Encoder</description>
<classname>org.pentaho.di.core.encryption.KettleTwoWayPasswordEncoder</classname>
</password-encoder-plugin>
</password-encoder-plugins>
mysql的xml配置文件
用的时候需要改成自己的数据库配置,name要和代码中的kettle数据库保持一致。
<?xml version="1.0" encoding="UTF-8"?>
<connection>
<!--数据库名称-->
<name>kettle</name>
<!--连接地址-->
<server>127.0.0.1</server>
<!--数据库类型-->
<type>Mysql</type>
<access>Native</access>
<!--连接的库的名称-->
<database>aweb</database>
<!--端口-->
<port>3306</port>
<!--账号-->
<username>root</username>
<!--密码-->
<password>root</password>
<attributes>
<attribute>
<code>USE_POOLING</code>
<attribute>Y</attribute>
</attribute>
<attribute>
<code>EXTRA_OPTION_MYSQL.characterEncoding</code>
<attribute>utf8</attribute>
</attribute>
<attribute>
<code>EXTRA_OPTION_MYSQL.defaultFetchSize</code>
<attribute>500</attribute>
</attribute>
<attribute>
<code>EXTRA_OPTION_MYSQL.serverTimeZone</code>
<attribute>UTC</attribute>
</attribute>
<attribute>
<code>serverTimeZone</code>
<attribute>UTC</attribute>
</attribute>
<attribute>
<code>serverTimezone</code>
<attribute>UTC</attribute>
</attribute>
</attributes>
</connection>
4.java代码
package com.kettle.utils;
import org.pentaho.di.core.KettleEnvironment;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.plugins.PluginRegistry;
import javax.servlet.http.HttpServletRequest;
/**
* kettle配置
*/
public class KettleConfig {
/**
* 运行环境初始化
*
* @param request HttpServletRequest
* @throws KettleException 异常
*/
public static void initKettleEnvironment(HttpServletRequest request) throws KettleException {
if (KettleEnvironment.isInitialized()) {
return;
}
if (request == null) {
KettleEnvironment.init();
}
}
/**
* 插件注册,用于注册转换中需要用到的插件
*
* @return PluginRegistry
*/
public static PluginRegistry getRegistry() {
return PluginRegistry.getInstance();
}
}
package com.kettle.utils;
import org.pentaho.di.core.database.Database;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleXMLException;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransHopMeta;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepMeta;
/**
* kettle转换操作
*/
public class KettleTransformation {
/**
* 构建TransMeta转换
*
* @param metaName 名称
* @param transXML 数据库配置xml
* @return TransMeta
* @throws KettleXMLException 异常
*/
public static TransMeta buildTransMeta(String metaName, String... transXML) throws KettleXMLException {
TransMeta transMeta = new TransMeta();
// 设置转化元的名称
transMeta.setName(metaName);
// 添加转换的数据库连接
for (int i = 0; i < transXML.length; i++) {
transMeta.addDatabase(new DatabaseMeta(transXML[i]));
}
return transMeta;
}
/**
* 用于将表输入步骤与第二步骤绑定
*
* @param transMeta TransMeta
* @param from 第一个步骤
* @param to 第二个步骤
*/
public static void addTransHop(TransMeta transMeta, StepMeta from, StepMeta to) {
transMeta.addTransHop(new TransHopMeta(from, to));
}
/**
* 执行抽取
*
* @param transMeta TransMeta
* @param targetDbName 数据库名
*/
public static void executeTrans(TransMeta transMeta, String targetDbName) {
try {
Database database = new Database(null, transMeta.findDatabase(targetDbName));
database.connect();
Trans trans = new Trans(transMeta);
trans.execute(new String[]{"com.kettle.start..."});
trans.waitUntilFinished();
// 关闭数据库连接
database.disconnect();
if (trans.getErrors() > 0) {
System.out.println(trans.getErrors());
}
} catch (KettleException e) {
e.printStackTrace();
}
}
}
package com.kettle.utils;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.plugins.PluginRegistry;
import org.pentaho.di.core.plugins.StepPluginType;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.steps.exceloutput.ExcelField;
import org.pentaho.di.trans.steps.exceloutput.ExcelOutputMeta;
import org.pentaho.di.trans.steps.tableinput.TableInputMeta;
/**
* kettle步骤
*/
public class KettleStep {
/**
* 设置表输入步骤
*
* @param transMeta TransMeta
* @param registry PluginRegistry
* @param sourceDbName 数据库名
* @param sql sql
* @param stepName 步骤名
* @return StepMeta
*/
public static StepMeta setTableInputStep(TransMeta transMeta, PluginRegistry registry, String sourceDbName, String sql,
String stepName) {
// 创建表输入
TableInputMeta tableInputMeta = new TableInputMeta();
String pluginId = registry.getPluginId(StepPluginType.class, tableInputMeta);
// 指定数据源数据库配置名
DatabaseMeta source = transMeta.findDatabase(sourceDbName);
tableInputMeta.setDatabaseMeta(source);
tableInputMeta.setSQL(sql);
// 将表输入添加到转换中
StepMeta stepMeta = new StepMeta(pluginId, stepName, tableInputMeta);
// 将表输入添加到步骤中
transMeta.addStep(stepMeta);
return stepMeta;
}
/**
* 设置Excel输出步骤
*
* @param transMeta TransMeta
* @param registry PluginRegistry
* @param stepName 步骤名
* @param fileName 文件名
* @return StepMeta
*/
public static StepMeta setExcelOutput(TransMeta transMeta, PluginRegistry registry, String stepName, String fileName) {
// 创建表输出
ExcelOutputMeta excelOutputMeta = new ExcelOutputMeta();
String pluginId = registry.getPluginId(StepPluginType.class, excelOutputMeta);
excelOutputMeta.setHeaderEnabled(true);
excelOutputMeta.setFooterEnabled(false);
excelOutputMeta.setAppend(false);
excelOutputMeta.setFileName(fileName);
excelOutputMeta.setDoNotOpenNewFileInit(false);
excelOutputMeta.setCreateParentFolder(false);
excelOutputMeta.setOutputFields(new ExcelField[]{});
// 将表输出添加到转换中
StepMeta stepMeta = new StepMeta(pluginId, stepName, excelOutputMeta);
// 添加到步骤中
transMeta.addStep(stepMeta);
return stepMeta;
}
}
package com.kettle.utils;
import com.kettle.common.enums.Kettle;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.plugins.PluginRegistry;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepMeta;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
@Component
public class KettleClient {
private String databaseName = "kettle";
/**
* 读取文件内容
*
* @param path 文件地址
* @return 文件中的内容
*/
public String getDatabase(String path) {
try {
Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new File(path));
Transformer t = TransformerFactory.newInstance().newTransformer();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
t.transform(new DOMSource(document), new StreamResult(bos));
return bos.toString();
} catch (SAXException | IOException | TransformerException | ParserConfigurationException e) {
e.printStackTrace();
}
return "";
}
public void getExcel(String sql) {
try {
KettleConfig.initKettleEnvironment(null);
String fileName = "D:/workProject/kettle/src/main/resources/excel/text.xls";
String transXML = getDatabase("src/main/resources/jdbc/mysql.xml");
TransMeta meta = KettleTransformation.buildTransMeta(databaseName, transXML);
PluginRegistry registry = KettleConfig.getRegistry();
StepMeta step1 = KettleStep.setTableInputStep(meta, registry, databaseName, sql, "table input");
StepMeta step2 = KettleStep.setExcelOutput(meta, registry, "excel out", fileName);
KettleTransformation.addTransHop(meta, step1, step2);
KettleTransformation.executeTrans(meta, databaseName);
} catch (KettleException e) {
e.printStackTrace();
}
}
}
注意kettle数据库的名称要和xml中配置的一样。
整理借鉴了很多大佬写的,在此无法一一说明,这只是个人用来查漏补缺的文章,如果对你有帮助我很高兴。