大数据技术——DataX配置文件生成器

目录

一、代码

1.1 新建Maven项目

1.3 新建com.atguigu.datax.beans.Column类

1.4  新建com.atguigu.datax.beans.Table类

1.5 新建com.atguigu.datax.configuration.Configuration类

1.6  ​​​​​​​新建com.atguigu.datax.helper.DataxJsonHelper类

1.7 ​​​​​​​新建com.atguigu.datax.helper.MysqlHelper类

1.8 ​​​​​​​新建com.atguigu.datax.Main类

二、如何使用

2.1 ​​​​​​ 修改配置文件

2.2 运行


一、代码

1.1 新建Maven项目

1.2 添加如下依赖

<properties>	
    <maven.compiler.source>8</maven.compiler.source>
    <maven.compiler.target>8</maven.compiler.target>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<dependencies>
    <dependency>
        <groupId>cn.hutool</groupId>
        <artifactId>hutool-json</artifactId>
        <version>5.8.11</version>
    </dependency>
    <dependency>
        <groupId>cn.hutool</groupId>
        <artifactId>hutool-db</artifactId>
        <version>5.8.11</version>
    </dependency>
    <dependency>
        <groupId>com.mysql</groupId>
        <artifactId>mysql-connector-j</artifactId>
        <version>8.0.31</version>
    </dependency>
</dependencies>
<build>
    <plugins>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-assembly-plugin</artifactId>
            <version>3.3.0</version>
            <configuration>
                <!--指定jar包的入口类,UDF用不到主类,因此不用指定-->
                <archive>
                    <manifest>
                        <mainClass>com.atguigu.datax.Main</mainClass>
                    </manifest>
                </archive>
                <!--将依赖编译到jar包中-->
                <descriptorRefs>
                    <descriptorRef>jar-with-dependencies</descriptorRef>
                </descriptorRefs>
            </configuration>
            <executions>
                <!--配置执行器-->
                <execution>
                    <id>make-assembly</id>
                    <!--绑定到package执行周期上-->
                    <phase>package</phase>
                    <goals>
                        <!--只运行一次-->
                        <goal>single</goal>
                    </goals>
                </execution>
            </executions>
        </plugin>
    </plugins>
</build>

1.3 新建com.atguigu.datax.beans.Column类

package com.atguigu.datax.beans;

import java.util.HashMap;
import java.util.Map;

public class Column {
    private final String name;
    private final String type;
    private final String hiveType;
    private static final Map<String, String> typeMap = new HashMap<>();

    static {
        typeMap.put("bigint", "bigint");
        typeMap.put("int", "bigint");
        typeMap.put("smallint", "bigint");
        typeMap.put("tinyint", "bigint");
        typeMap.put("double", "double");
        typeMap.put("float", "float");
    }

    public Column(String name, String type) {
        this.name = name;
        this.type = type;
        this.hiveType = typeMap.getOrDefault(type, "string");
    }

    public String name() {
        return name;
    }

    public String type() {
        return type;
    }

    public String hiveType() {
        return hiveType;
    }
}

1.4  新建com.atguigu.datax.beans.Table

package com.atguigu.datax.beans;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class Table {
    private final String tableName;
    private final List<Column> columns;

    public Table(String tableName) {
        this.tableName = tableName;
        this.columns = new ArrayList<>();

    }

    public String name() {
        return tableName;
    }

    public void addColumn(String name, String type) {
        columns.add(new Column(name, type));
    }

    public List<String> getColumnNames() {
        return columns.stream().map(Column::name).collect(Collectors.toList());
    }

    public List<Map<String, String>> getColumnNamesAndTypes() {
        List<Map<String, String>> result = new ArrayList<>();
        columns.forEach(column -> {
            Map<String, String> temp = new HashMap<>();
            temp.put("name", column.name());
            temp.put("type", column.hiveType());
            result.add(temp);
        });
        return result;
    }
}

1.5 ​​​​​​​新建com.atguigu.datax.configuration.Configuration

package com.atguigu.datax.configuration;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Properties;

public class Configuration {
    public static String MYSQL_USER;
    public static String MYSQL_PASSWORD;
    public static String MYSQL_HOST;
    public static String MYSQL_PORT;
    public static String MYSQL_DATABASE_IMPORT;
    public static String MYSQL_DATABASE_EXPORT;
    public static String MYSQL_URL_IMPORT;
    public static String MYSQL_URL_EXPORT;
    public static String MYSQL_TABLES_IMPORT;
    public static String MYSQL_TABLES_EXPORT;
    public static String IS_SEPERATED_TABLES;
    public static String HDFS_URI;
    public static String IMPORT_OUT_DIR;
    public static String EXPORT_OUT_DIR;
    public static String IMPORT_MIGRATION_TYPE = "import";
    public static String EXPORT_MIGRATION_TYPE = "export";

    static {
        Path path = Paths.get("configuration.properties");
        Properties configuration = new Properties();
        try {
            configuration.load(Files.newBufferedReader(path));
            MYSQL_USER = configuration.getProperty("mysql.username", "root");
            MYSQL_PASSWORD = configuration.getProperty("mysql.password", "000000");
            MYSQL_HOST = configuration.getProperty("mysql.host", "hadoop102");
            MYSQL_PORT = configuration.getProperty("mysql.port", "3306");
            MYSQL_DATABASE_IMPORT = configuration.getProperty("mysql.database.import", "ad");
            MYSQL_DATABASE_EXPORT = configuration.getProperty("mysql.database.export", "ad_report");
            MYSQL_URL_IMPORT = "jdbc:mysql://" + MYSQL_HOST + ":" + MYSQL_PORT + "/" + MYSQL_DATABASE_IMPORT + "?useSSL=false&allowPublicKeyRetrieval=true&useUnicode=true&characterEncoding=utf-8";
            MYSQL_URL_EXPORT = "jdbc:mysql://" + MYSQL_HOST + ":" + MYSQL_PORT + "/" + MYSQL_DATABASE_EXPORT + "?useSSL=false&allowPublicKeyRetrieval=true&useUnicode=true&characterEncoding=utf-8";
            MYSQL_TABLES_IMPORT = configuration.getProperty("mysql.tables.import", "");
            MYSQL_TABLES_EXPORT = configuration.getProperty("mysql.tables.export", "");
            IS_SEPERATED_TABLES = configuration.getProperty("is.seperated.tables", "0");
            HDFS_URI = configuration.getProperty("hdfs.uri", "hdfs://hadoop102:8020");
            IMPORT_OUT_DIR = configuration.getProperty("import_out_dir");
            EXPORT_OUT_DIR = configuration.getProperty("export_out_dir");
        } catch (IOException e) {
            MYSQL_USER = "root";
            MYSQL_PASSWORD = "000000";
            MYSQL_HOST = "hadoop102";
            MYSQL_PORT = "3306";
            MYSQL_DATABASE_IMPORT = "ad";
            MYSQL_DATABASE_EXPORT = "ad_report";
            MYSQL_URL_IMPORT = "jdbc:mysql://" + MYSQL_HOST + ":" + MYSQL_PORT + "/" + MYSQL_DATABASE_IMPORT + "?useSSL=false&allowPublicKeyRetrieval=true&useUnicode=true&characterEncoding=utf-8";
            MYSQL_URL_EXPORT = "jdbc:mysql://" + MYSQL_HOST + ":" + MYSQL_PORT + "/" + MYSQL_DATABASE_EXPORT + "?useSSL=false&allowPublicKeyRetrieval=true&useUnicode=true&characterEncoding=utf-8";
            MYSQL_TABLES_IMPORT = "";
            MYSQL_TABLES_EXPORT = "";
            IS_SEPERATED_TABLES = "0";
            HDFS_URI = "hdfs://hadoop102:8020";
            IMPORT_OUT_DIR = null;
            EXPORT_OUT_DIR = null;
        }
    }

    public static void main(String[] args) {
        System.out.println(MYSQL_DATABASE_EXPORT);
    }
}

1.6  ​​​​​​​新建com.atguigu.datax.helper.DataxJsonHelper

package com.atguigu.datax.helper;

import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import com.atguigu.datax.beans.Table;
import com.atguigu.datax.configuration.Configuration;

public class DataxJsonHelper {

    // 解析 inputConfig 和 outputConfig 模板

    // Hadoop 单点集群
    private final JSONObject inputConfig = JSONUtil.parseObj("{\"job\":{\"content\":[{\"reader\":{\"name\":\"mysqlreader\",\"parameter\":{\"column\":[],\"connection\":[{\"jdbcUrl\":[],\"table\":[]}],\"password\":\"\",\"splitPk\":\"\",\"username\":\"\"}},\"writer\":{\"name\":\"hdfswriter\",\"parameter\":{\"column\":[],\"compress\":\"gzip\",\"defaultFS\":\"\",\"fieldDelimiter\":\"\\t\",\"fileName\":\"content\",\"fileType\":\"text\",\"path\":\"${targetdir}\",\"writeMode\":\"truncate\",\"nullFormat\":\"\"}}}],\"setting\":{\"speed\":{\"channel\":1}}}}");
    private final JSONObject outputConfig = JSONUtil.parseObj("{\"job\":{\"setting\":{\"speed\":{\"channel\":1}},\"content\":[{\"reader\":{\"name\":\"hdfsreader\",\"parameter\":{\"path\":\"${exportdir}\",\"defaultFS\":\"\",\"column\":[\"*\"],\"fileType\":\"text\",\"encoding\":\"UTF-8\",\"fieldDelimiter\":\"\\t\",\"nullFormat\":\"\\\\N\"}},\"writer\":{\"name\":\"mysqlwriter\",\"parameter\":{\"writeMode\":\"replace\",\"username\":\"\",\"password\":\"\",\"column\":[],\"connection\":[{\"jdbcUrl\":\"\",\"table\":[]}]}}}]}}");

    // Hadoop HA 集群
//    private final JSONObject inputConfig = JSONUtil.parseObj("{\"job\": {\"content\": [{\"reader\": {\"name\": \"mysqlreader\",\"parameter\": {\"column\": [],\"connection\": [{\"jdbcUrl\": [],\"table\": []}],\"password\": \"\",\"splitPk\": \"\",\"username\": \"\"}},\"writer\": {\"name\": \"hdfswriter\",\"parameter\": {\"column\": [],\"compress\": \"gzip\",\"defaultFS\": \"hdfs://mycluster\",\"dfs.nameservices\": \"mycluster\",\"dfs.ha.namenodes.mycluster\": \"namenode1,namenode2\",\"dfs.namenode.rpc-address.aliDfs.namenode1\": \"hdfs://hadoop102:8020\",\"dfs.namenode.rpc-address.aliDfs.namenode2\": \"hdfs://hadoop103:8020\",\"dfs.client.failover.proxy.provider.mycluster\": \"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider\",\"fieldDelimiter\": \"\\t\",\"fileName\": \"content\",\"fileType\": \"text\",\"path\": \"${targetdir}\",\"writeMode\": \"truncate\",\"nullFormat\": \"\"}}}],\"setting\": {\"speed\": {\"channel\": 1}}}}");
//    private final JSONObject outputConfig = JSONUtil.parseObj("{\"job\": {\"setting\": {\"speed\": {\"channel\": 1}},\"content\": [{\"reader\": {\"name\": \"hdfsreader\",\"parameter\": {\"path\": \"${exportdir}\",\"defaultFS\": \"\",\"dfs.nameservices\": \"mycluster\",\"dfs.ha.namenodes.mycluster\": \"namenode1,namenode2\",\"dfs.namenode.rpc-address.aliDfs.namenode1\": \"hdfs://hadoop102:8020\",\"dfs.namenode.rpc-address.aliDfs.namenode2\": \"hdfs://hadoop103:8020\",\"dfs.client.failover.proxy.provider.mycluster\": \"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider\",\"column\": [\"*\"],\"fileType\": \"text\",\"encoding\": \"UTF-8\",\"fieldDelimiter\": \"\\t\",\"nullFormat\": \"\\\\N\"}},\"writer\": {\"name\": \"mysqlwriter\",\"parameter\": {\"writeMode\": \"replace\",\"username\": \"\",\"password\": \"\",\"column\": [],\"connection\": [{\"jdbcUrl\": [],\"table\": []}]}}}]}}");

    public DataxJsonHelper() {
        // 获取 Reader 和 Writer 配置
        JSONObject mysqlReaderPara = inputConfig.getByPath("job.content[0].reader.parameter", JSONObject.class);
        JSONObject hdfsWriterPara = inputConfig.getByPath("job.content[0].writer.parameter", JSONObject.class);
        JSONObject hdfsReaderPara = outputConfig.getByPath("job.content[0].reader.parameter", JSONObject.class);
        JSONObject mysqlWriterPara = outputConfig.getByPath("job.content[0].writer.parameter", JSONObject.class);

        // 设置 DefaultFS
        hdfsReaderPara.set("defaultFS", Configuration.HDFS_URI);
        hdfsWriterPara.set("defaultFS", Configuration.HDFS_URI);

        // 设置 MySQL Username
        mysqlReaderPara.set("username", Configuration.MYSQL_USER);
        mysqlWriterPara.set("username", Configuration.MYSQL_USER);

        // 设置 MySQL Password
        mysqlReaderPara.set("password", Configuration.MYSQL_PASSWORD);
        mysqlWriterPara.set("password", Configuration.MYSQL_PASSWORD);

        // 设置 JDBC URL
        mysqlReaderPara.putByPath("connection[0].jdbcUrl[0]", Configuration.MYSQL_URL_IMPORT);
        mysqlWriterPara.putByPath("connection[0].jdbcUrl", Configuration.MYSQL_URL_EXPORT);

        // 写回Reader和Writer配置
        inputConfig.putByPath("job.content[0].reader.parameter", mysqlReaderPara);
        inputConfig.putByPath("job.content[0].writer.parameter", hdfsWriterPara);
        outputConfig.putByPath("job.content[0].reader.parameter", hdfsReaderPara);
        outputConfig.putByPath("job.content[0].writer.parameter", mysqlWriterPara);
    }

    public void setTableAndColumns(Table table, int index, String migrationType) {
        // 设置表名
        setTable(table, index, migrationType);
        // 设置列名及路径
        setColumns(table, migrationType);
    }

    public void setColumns(Table table, String migrationType) {
        if (migrationType.equals("import")) {
            // 设置 hdfswriter 文件名
            inputConfig.putByPath("job.content[0].writer.parameter.fileName", table.name());
            // 设置列名
            inputConfig.putByPath("job.content[0].reader.parameter.column", table.getColumnNames());
            inputConfig.putByPath("job.content[0].writer.parameter.column", table.getColumnNamesAndTypes());
        } else {
            // 设置列名
            outputConfig.putByPath("job.content[0].writer.parameter.column", table.getColumnNames());
        }
    }

    public void setTable(Table table, int index, String migrationType) {
        if (migrationType.equals("import")) {
            // 设置表名
            inputConfig.putByPath("job.content[0].reader.parameter.connection[0].table[" + index + "]", table.name());
        } else {
            outputConfig.putByPath("job.content[0].writer.parameter.connection[0].table[" + index + "]", table.name());
        }
    }

    public JSONObject getInputConfig() {
        return inputConfig;

    }

    public JSONObject getOutputConfig() {
        return outputConfig;
    }
}

1.7 ​​​​​​​新建com.atguigu.datax.helper.MysqlHelper

package com.atguigu.datax.helper;

import cn.hutool.db.Db;
import cn.hutool.db.Entity;
import cn.hutool.db.ds.DSFactory;
import cn.hutool.setting.Setting;
import com.atguigu.datax.beans.Table;
import com.atguigu.datax.configuration.Configuration;

import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;

public class MysqlHelper {
    private final List<Table> tables;

    public List<Table> getTables() {
        return tables;
    }

    public MysqlHelper(String url, String database, String mysqlTables) {
        tables = new ArrayList<>();

        Db db = Db.use(DSFactory.create(
                Setting.create()
                        .set("url", url)
                        .set("user", Configuration.MYSQL_USER)
                        .set("pass", Configuration.MYSQL_PASSWORD)
                        .set("showSql", "false")
                        .set("showParams", "false")
                        .set("sqlLevel", "info")
        ).getDataSource());

        // 获取设置的表格,如未设置,查询数据库下面所有表格
        if (mysqlTables != null && !"".equals(mysqlTables)) {
            for (String mysqlTable : mysqlTables.split(",")) {
                tables.add(new Table(mysqlTable));
            }
        } else {
            try {
                db.findAll(Entity.create("information_schema.TABLES")
                                .set("TABLE_SCHEMA", database))
                        .forEach(entity ->
                                tables.add(new Table(entity.getStr("TABLE_NAME"))));
            } catch (SQLException e) {
                throw new RuntimeException(e);
            }
        }


        // 获取所有表格的列
        for (Table table : tables) {
            try {
                db.findAll(Entity.create("information_schema.COLUMNS")
                                .set("TABLE_SCHEMA", database)
                                .set("TABLE_NAME", table.name())
                        ).stream()
                        .sorted(Comparator.comparingInt(o -> o.getInt("ORDINAL_POSITION")))
                        .forEach(entity -> table.addColumn(
                                entity.getStr("COLUMN_NAME"),
                                entity.getStr("DATA_TYPE")
                        ));
            } catch (SQLException e) {
                throw new RuntimeException(e);
            }
        }
    }
}

1.8 ​​​​​​​新建com.atguigu.datax.Main

package com.atguigu.datax;

import cn.hutool.json.JSONUtil;
import com.atguigu.datax.beans.Table;
import com.atguigu.datax.configuration.Configuration;
import com.atguigu.datax.helper.DataxJsonHelper;
import com.atguigu.datax.helper.MysqlHelper;

import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;

public class Main {
    public static void main(String[] args) throws IOException {

        // 生成 HDFS 入方向配置文件
        if (Configuration.IMPORT_OUT_DIR != null &&
                !Configuration.IMPORT_OUT_DIR.equals("")) {
            MysqlHelper mysqlHelper = new MysqlHelper(
                    Configuration.MYSQL_URL_IMPORT,
                    Configuration.MYSQL_DATABASE_IMPORT,
                    Configuration.MYSQL_TABLES_IMPORT);
            DataxJsonHelper dataxJsonHelper = new DataxJsonHelper();

            // 获取迁移操作类型
            String migrationType = Configuration.IMPORT_MIGRATION_TYPE;

            // 创建父文件夹
            Files.createDirectories(Paths.get(Configuration.IMPORT_OUT_DIR));
            List<Table> tables = mysqlHelper.getTables();

            // 判断传入的表是否为分表,根据判断结果采用不同的处理策略
            if (Configuration.IS_SEPERATED_TABLES.equals("1")) {
                for (int i = 0; i < tables.size(); i++) {
                    Table table = tables.get(i);
                    dataxJsonHelper.setTable(table, i, migrationType);
                }
                dataxJsonHelper.setColumns(tables.get(0), migrationType);

                // 输出最终Json配置
                FileWriter inputWriter = new FileWriter(Configuration.IMPORT_OUT_DIR + "/" + Configuration.MYSQL_DATABASE_IMPORT + "." + tables.get(0).name() + ".json");
                JSONUtil.toJsonStr(dataxJsonHelper.getInputConfig(), inputWriter);
                inputWriter.close();
            } else {
                for (Table table : tables) {
                    // 设置表信息
                    dataxJsonHelper.setTableAndColumns(table, 0, migrationType);

                    // 输出最终Json配置
                    FileWriter inputWriter = new FileWriter(Configuration.IMPORT_OUT_DIR + "/" + Configuration.MYSQL_DATABASE_IMPORT + "." + table.name() + ".json");
                    JSONUtil.toJsonStr(dataxJsonHelper.getInputConfig(), inputWriter);
                    inputWriter.close();
                }
            }
        }

        // 生成 HDFS 出方向配置文件
        if (Configuration.EXPORT_OUT_DIR != null &&
                !"".equals(Configuration.EXPORT_OUT_DIR)) {
            MysqlHelper mysqlHelper = new MysqlHelper(
                    Configuration.MYSQL_URL_EXPORT,
                    Configuration.MYSQL_DATABASE_EXPORT,
                    Configuration.MYSQL_TABLES_EXPORT);
            DataxJsonHelper dataxJsonHelper = new DataxJsonHelper();

            // 获取迁移操作类型
            String migrationType = Configuration.EXPORT_MIGRATION_TYPE;

            // 创建父文件夹
            Files.createDirectories(Paths.get(Configuration.EXPORT_OUT_DIR));
            List<Table> tables = mysqlHelper.getTables();

            if (Configuration.IS_SEPERATED_TABLES.equals("1")) {
                for (int i = 0; i < tables.size(); i++) {
                    Table table = tables.get(i);
                    dataxJsonHelper.setTable(table, i, migrationType);
                }
                dataxJsonHelper.setColumns(tables.get(0), migrationType);

                // 输出最终Json配置
                FileWriter outputWriter = new FileWriter(Configuration.EXPORT_OUT_DIR + "/" + Configuration.MYSQL_DATABASE_EXPORT + "." + tables.get(0).name() + ".json");
                JSONUtil.toJsonStr(dataxJsonHelper.getOutputConfig(), outputWriter);
                outputWriter.close();
            }

            for (Table table : tables) {
                // 设置表信息
                dataxJsonHelper.setTableAndColumns(table, 0, migrationType);
                // 输出最终Json配置
                FileWriter outputWriter = new FileWriter(Configuration.EXPORT_OUT_DIR + "/" + Configuration.MYSQL_DATABASE_EXPORT + "." + table.name() + ".json");
                JSONUtil.toJsonStr(dataxJsonHelper.getOutputConfig(), outputWriter);
                outputWriter.close();
            }
        }
    }
}

二、如何使用

2.1 ​​​​​​ 修改配置文件

在项目根目录新建配置文件:configuration.properties

key

default

description

mysql.username

root

MySQL用户名

mysql.password

000000

MySQL密码

mysql.host

hadoop102

MySQL所在Host

mysql.port

3306

MySQL端口号

mysql.database.import

ad

导入HDFS的数据库

mysql.database.export

ad_report

从HDFS导出的数据库

mysql.tables.import

""

需要导入的表,空字符串表示全部导入

mysql.tables.export

""

需要导出的表,空字符串表示全部导出

is.seperated.tables

0

是否为分表,0为否

hdfs.uri

hdfs://hadoop102:8020

HDFS Namenode 地址

import_out_dir

null

导入HDFS的配置文件输出地址

export_out_dir

null

从HDFS导出的配置文件输出地址

文件内容如下。

mysql.username=root
mysql.password=000000
mysql.host=hadoop102
mysql.port=3306
mysql.database.import=ad
mysql.database.export=ad_report
mysql.tables.import=
mysql.tables.export=
is.seperated.tables=0
hdfs.uri=hdfs://hadoop102:8020
import_out_dir=d:/output/import
export_out_dir=d:/output/export

2.2 运行

执行mvn clean package打包,target目录会生成datax-config-generator-1.0-SNAPSHOT-jar-with-dependencies.jar。和configuration.properties文件一并拷入Linux,执行即可:

[atguigu@hadoop102 ~]$ java -jar datax-config-generator-1.0-SNAPSHOT-jar-with-dependencies.jar

  • 3
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值