java集成kettle 实现资源库作业和转换的创建和调用

java集成kettle 实现资源库作业和转换的创建和调用

kettle 8.2版本 spring boot 2.16版本 maven仓库 3.6版本
maven仓库必须使用kettle官方提供的settings.xml 国内源下载不到

官方settings.xml

<?xml version="1.0" encoding="UTF-8"?>
<settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0 http://maven.apache.org/xsd/settings-1.0.0.xsd">

  <!-- This is the recommended settings.xml for development of Hitachi Vantara projects. -->

  <!--
  If your wish to mirror everything through pentaho-public's repo uncomment bellow. Not recommended
  for external developers.
  -->
  <!--
  <mirrors>
    <mirror>
      <id>pentaho-public</id>
      <url>http://nexus.pentaho.org/content/groups/omni</url>
      <mirrorOf>*</mirrorOf>
    </mirror>
  </mirrors>
  -->
	<!--下载的依赖存放地址,自己决定-->
  <localRepository>F:\apache-maven-3.5.2\repository-3.5.2</localRepository>		

  <!--
  You might want to tweak the 'updatePolicy' configuration to fit your need on having updated snapshots and
  releases. Our recommendation is to set it to 'never' and run maven with the '-U' flag when needed.
  -->
  <profiles>
    <profile>
      <id>pentaho</id>
      <activation>
        <activeByDefault>true</activeByDefault>
      </activation>
      <repositories>
        <repository>
          <id>pentaho-public</id>
          <name>Pentaho Public</name>
          <url>http://nexus.pentaho.org/content/groups/omni</url>
          <releases>
            <enabled>true</enabled>
            <updatePolicy>always</updatePolicy>
          </releases>
          <snapshots>
            <enabled>true</enabled>
            <updatePolicy>always</updatePolicy>
          </snapshots>
        </repository>
      </repositories>
      <pluginRepositories>
        <pluginRepository>
          <id>pentaho-public</id>
          <name>Pentaho Public</name>
          <url>http://nexus.pentaho.org/content/groups/omni</url>
          <releases>
            <enabled>true</enabled>
            <updatePolicy>always</updatePolicy>
          </releases>
          <snapshots>
            <enabled>true</enabled>
            <updatePolicy>always</updatePolicy>
          </snapshots>
        </pluginRepository>
      </pluginRepositories>
    </profile>
  </profiles>

  <!-- this lets you call plugins from these groups in their short form -->
  <pluginGroups>
    <pluginGroup>org.pentaho.maven.plugins</pluginGroup>
    <pluginGroup>com.pentaho.maven.plugins</pluginGroup>
    <pluginGroup>com.github.spotbugs</pluginGroup>
  </pluginGroups>
</settings>

项目pom依赖

  <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.1.6.RELEASE</version>
        <relativePath/>
    </parent>
    <properties>
        <maven.compiler.encoding>UTF-8</maven.compiler.encoding>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <poi.version>3.17</poi.version>
        <jackson.version>2.9.5</jackson.version>
        <kettle.version>8.2.0.0-SNAPSHOT</kettle.version>
        <mysql.version>5.1.48</mysql.version>
    </properties>

    <dependencies>

        <!--kettle dependency start -->
        <dependency>
            <groupId>pentaho-kettle</groupId>
            <artifactId>kettle-core</artifactId>
            <version>${kettle.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-log4j12</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.javassist</groupId>
                    <artifactId>javassist</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>pentaho-kettle</groupId>
            <artifactId>kettle-dbdialog</artifactId>
            <version>${kettle.version}</version>
        </dependency>
        <dependency>
            <groupId>pentaho-kettle</groupId>
            <artifactId>kettle-engine</artifactId>
            <version>${kettle.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>javax.servlet</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.hibernate</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>pentaho-kettle</groupId>
            <artifactId>kettle-ui-swt</artifactId>
            <version>${kettle.version}</version>
        </dependency>
        <dependency>
            <groupId>pentaho</groupId>
            <artifactId>metastore</artifactId>
            <version>${kettle.version}</version>
        </dependency>
        <dependency>
            <groupId>pentaho</groupId>
            <artifactId>pentaho-vfs-browser</artifactId>
            <version>${kettle.version}</version>
        </dependency>

        <dependency>
            <groupId>org.pentaho.reporting.library</groupId>
            <artifactId>libbase</artifactId>
            <version>${kettle.version}</version>
        </dependency>

        <dependency>
            <groupId>org.pentaho.reporting.library</groupId>
            <artifactId>libxml</artifactId>
            <version>${kettle.version}</version>
        </dependency>

        <!--kettle dependency end -->
        <!-- jackson json -->
        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-core</artifactId>
            <version>${jackson.version}</version>
        </dependency>

        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.58</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>2.2</version>
        </dependency>

        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-vfs2</artifactId>
            <version>2.2</version>
        </dependency>
        <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
            <version>17.0</version>
        </dependency>

        <dependency>
            <groupId>commons-codec</groupId>
            <artifactId>commons-codec</artifactId>
            <version>1.9</version>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>${mysql.version}</version>
        </dependency>
        <dependency>
            <groupId>net.sourceforge.jexcelapi</groupId>
            <artifactId>jxl</artifactId>
            <version>2.6.12</version>
        </dependency>
        <dependency>
            <groupId>org.apache.xmlgraphics</groupId>
            <artifactId>xmlgraphics-commons</artifactId>
            <version>1.5</version>
        </dependency>

        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>${poi.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>${poi.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml-schemas</artifactId>
            <version>${poi.version}</version>
        </dependency>


        <dependency>
            <groupId>commons-lang</groupId>
            <artifactId>commons-lang</artifactId>
            <version>2.6</version>
        </dependency>

        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter</artifactId>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-thymeleaf</artifactId>
        </dependency>

        <dependency>
            <groupId>com.vladmihalcea</groupId>
            <artifactId>hibernate-types-5</artifactId>
            <version>2.4.2</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>druid</artifactId>
            <version>1.1.15</version>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
            <exclusions>
                <exclusion>
                    <groupId>org.springframework.boot</groupId>
                    <artifactId>spring-boot-starter-logging</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-redis</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-log4j2</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-jpa</artifactId>
        </dependency>

<!--        <dependency>-->
<!--            <groupId>org.projectlombok</groupId>-->
<!--            <artifactId>lombok</artifactId>-->
<!--            <version>1.16.20</version>-->
<!--            <scope>provided</scope>-->
<!--        </dependency>-->
    </dependencies>

代码部分

import org.junit.Test;
import org.pentaho.di.core.KettleEnvironment;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.repository.ObjectId;
import org.pentaho.di.repository.kdr.KettleDatabaseRepository;
import org.pentaho.di.repository.kdr.KettleDatabaseRepositoryMeta;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransHopMeta;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.steps.tableinput.TableInputMeta;
import org.pentaho.di.trans.steps.tableoutput.TableOutputMeta;

/**
 * @author : zgc
 * @date : 2020/3/27
 * @versions : 1.0
 * @content
 */
public class kettleDey1 {

    /**
     * kettle 数据库连接初始化
     *
     * @throws KettleException
     */
    public static KettleDatabaseRepository repositoryConnect() throws KettleException {
        if (!KettleEnvironment.isInitialized()) {
            try {
                KettleEnvironment.init();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        /*
        数据库元连接
        参数介绍
        kettle资源库名称,数据库类型,连接类型,ip,数据库名称,端口,用户名,密码

         */
        DatabaseMeta databaseMeta = new DatabaseMeta("kettle_dev", "mysql", "Native(JDBC)", "127.0.0.1", "kettle_dev?useUnicode=true&characterEncoding=utf-8&useSSL=false",
                "3306", "root", "root");

        //数据库形式的资源库元对象
        KettleDatabaseRepositoryMeta repository = new KettleDatabaseRepositoryMeta();
        //保存数据库连接至元对象种
        repository.setConnection(databaseMeta);
        //数据库形式的资源库对象
        KettleDatabaseRepository databaseRepository = new KettleDatabaseRepository();
        //用资源库元对象初始化资源库对象
        databaseRepository.init(repository);
        //连接到资源库 默认为admin admin
        databaseRepository.connect("admin", "admin");
        //判断状态为false | true
        if (databaseRepository.isConnected()) {
            System.out.println("连接成功,数据库名称为:" + databaseRepository.getDatabaseMeta());
            return databaseRepository;
        } else {
            System.out.println("连接失败,数据库名称为:" + databaseRepository.getDatabaseMeta());
            return null;
        }
    }

    /**
     * 创建资源库作业或转换并执行
     * 实例:创建转换 并实现表输入表输出步骤
     * r_user 是kettle资源库自带的表,创建一个相同的表结构为t_user 进行测试数据
     */
    @Test
    public void demo1() throws KettleException {
        //获取资源库连接对象
        KettleDatabaseRepository repository = repositoryConnect();
        //创建转换对象
        TransMeta transMeta = new TransMeta();
        //转换的元资源库对象
        transMeta.setRepository(repository);
        //转换的名称
        transMeta.setName("transDey1");
        //创建转换保存的位置
        transMeta.setRepositoryDirectory(repository.findDirectory("/"));
        //为转换添加一个数据源
        transMeta.addDatabase(repository.getDatabaseMeta());

        //表输出组件
        TableInputMeta inputMeta = new TableInputMeta();
        //设定默认值
        inputMeta.setDefault();
        //为该步骤设置数据源
        inputMeta.setDatabaseMeta(transMeta.findDatabase("kettle_dev"));
        //表输入sql语句
        inputMeta.setSQL("select * from r_user");
        //创建步骤实例,把组件放入步骤中
        StepMeta stepMeta1 = new StepMeta("步骤1-表输入", inputMeta);
        //允许绘制,展示效果
        stepMeta1.setDraw(true);
        //在页面显示的位置
        stepMeta1.setLocation(100,200);
        //把步骤添加到作业中
        transMeta.addStep(stepMeta1);

        //表输出组件 参数和上方同理
        TableOutputMeta outputMeta = new TableOutputMeta();
        outputMeta.setDefault();
        outputMeta.setDatabaseMeta(transMeta.findDatabase("kettle_dev"));
        outputMeta.setTableName("t_user");
        StepMeta stepMeta2 = new StepMeta("步骤2-表输出", outputMeta);
        stepMeta2.setDraw(true);
        stepMeta2.setLocation(200,300);
        transMeta.addStep(stepMeta2);

        //连接对象,把步骤1和步骤2进行关联
        TransHopMeta transHopMeta = new TransHopMeta(stepMeta1, stepMeta2);
        //添加到转换中
        transMeta.addTransHop(transHopMeta);

        //保存该转换到资源库
        repository.save(transMeta, null);
        //把转换放入执行对象中  job同理 new Job() 传入对象即可
        Trans trans = new Trans(transMeta);
        //开始执行
        trans.execute(null);
        //等待程序执行完毕
        trans.waitUntilFinished();

    }

    /**
     * 调用资源库作业或转换并执行
     * 实例:调用转换
     */
    @Test
    public void demo2() throws KettleException {
        //获取资源库连接对象
        KettleDatabaseRepository repository = repositoryConnect();
        //调用资源库名为kettle1的转换,从/目录开始查找
        //调用job同理,repository.getJobId()即可,参数相同
        //返回的objectId就是当前你调用的转换/作业的 ID
        ObjectId objectId = repository.getTransformationID("transDey1", repository.findDirectory("/"));
        //从资源库加载转换,传入查到的ID序号并返回一个转换对象
        //加载作业同理,repository.loadJob(objectId, null)
        TransMeta transMeta = repository.loadTransformation(objectId, null);
        //保存该转换到资源库
        repository.save(transMeta, null);
        //把转换放入执行对象中  job同理 new Job() 传入对象即可
        Trans trans = new Trans(transMeta);
        //开始执行
        trans.execute(null);
        //等待程序执行完毕
        trans.waitUntilFinished();
    }

}

在这里插入图片描述

在这里插入图片描述
在这里插入图片描述
至此,一个简单的表输出表输出组件就完成了

  • 1
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
Kettle 是一款开源的 ETL 工具,提供了丰富的 API,可以通过 Java 调用 Kettle API 实现数据同步。下面是一个简单的示例: 1. 导入 Kettle 依赖包 Kettle 的依赖包可以从官网下载,也可以通过 Maven 引入。在 pom.xml 文件中添加以下依赖: ```xml <dependency> <groupId>org.pentaho</groupId> <artifactId>kettle-core</artifactId> <version>9.1.0.0-324</version> </dependency> ``` 2. 创建 Kettle 连接 使用 Kettle API 前需要创建一个 Kettle 连接,可以通过以下代码创建: ```java KettleEnvironment.init(); DatabaseMeta databaseMeta = new DatabaseMeta("mysql", "mysql", "Native (JDBC)", "localhost", "3306", "test", "root", "password"); Database database = new Database(null, databaseMeta); database.connect(); ``` 其中,`DatabaseMeta` 是数据库连接的元数据,包括数据库类型、主机名、端口号、数据库名、用户名和密码等信息;`Database` 是数据库连接对象,通过 `connect()` 方法连接数据库。 3. 创建转换并执行 Kettle 中的数据同步是通过转换(Transformation)实现的,转换包含了数据源、目标、转换规则等信息。可以通过以下代码创建转换并执行: ```java TransMeta transMeta = new TransMeta("path/to/transformation.ktr"); Trans trans = new Trans(transMeta); trans.execute(null); trans.waitUntilFinished(); if (trans.getErrors() > 0) { throw new RuntimeException("Transformation failed with errors!"); } ``` 其中,`TransMeta` 是转换元数据,包含了转换的各种信息,如输入输出、转换步骤等;`Trans` 是转换对象,通过 `execute()` 方法执行转换,`waitUntilFinished()` 方法等待转换完成,`getErrors()` 方法获取转换过程中的错误数量。 以上是基本的 Kettle API 调用流程,具体的实现还需要根据实际情况进行调整。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值