目录
1.3 新建com.atguigu.datax.beans.Column类
1.4 新建com.atguigu.datax.beans.Table类
1.5 新建com.atguigu.datax.configuration.Configuration类
1.6 新建com.atguigu.datax.helper.DataxJsonHelper类
1.7 新建com.atguigu.datax.helper.MysqlHelper类
1.8 新建com.atguigu.datax.Main类
一、代码
1.1 新建Maven项目
1.2 添加如下依赖
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-json</artifactId>
<version>5.8.11</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-db</artifactId>
<version>5.8.11</version>
</dependency>
<dependency>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>
<version>8.0.31</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<!--指定jar包的入口类,UDF用不到主类,因此不用指定-->
<archive>
<manifest>
<mainClass>com.atguigu.datax.Main</mainClass>
</manifest>
</archive>
<!--将依赖编译到jar包中-->
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<!--配置执行器-->
<execution>
<id>make-assembly</id>
<!--绑定到package执行周期上-->
<phase>package</phase>
<goals>
<!--只运行一次-->
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
1.3 新建com.atguigu.datax.beans.Column类
package com.atguigu.datax.beans;
import java.util.HashMap;
import java.util.Map;
public class Column {
private final String name;
private final String type;
private final String hiveType;
private static final Map<String, String> typeMap = new HashMap<>();
static {
typeMap.put("bigint", "bigint");
typeMap.put("int", "bigint");
typeMap.put("smallint", "bigint");
typeMap.put("tinyint", "bigint");
typeMap.put("double", "double");
typeMap.put("float", "float");
}
public Column(String name, String type) {
this.name = name;
this.type = type;
this.hiveType = typeMap.getOrDefault(type, "string");
}
public String name() {
return name;
}
public String type() {
return type;
}
public String hiveType() {
return hiveType;
}
}
1.4 新建com.atguigu.datax.beans.Table类
package com.atguigu.datax.beans;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class Table {
private final String tableName;
private final List<Column> columns;
public Table(String tableName) {
this.tableName = tableName;
this.columns = new ArrayList<>();
}
public String name() {
return tableName;
}
public void addColumn(String name, String type) {
columns.add(new Column(name, type));
}
public List<String> getColumnNames() {
return columns.stream().map(Column::name).collect(Collectors.toList());
}
public List<Map<String, String>> getColumnNamesAndTypes() {
List<Map<String, String>> result = new ArrayList<>();
columns.forEach(column -> {
Map<String, String> temp = new HashMap<>();
temp.put("name", column.name());
temp.put("type", column.hiveType());
result.add(temp);
});
return result;
}
}
1.5 新建com.atguigu.datax.configuration.Configuration类
package com.atguigu.datax.configuration;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Properties;
public class Configuration {
public static String MYSQL_USER;
public static String MYSQL_PASSWORD;
public static String MYSQL_HOST;
public static String MYSQL_PORT;
public static String MYSQL_DATABASE_IMPORT;
public static String MYSQL_DATABASE_EXPORT;
public static String MYSQL_URL_IMPORT;
public static String MYSQL_URL_EXPORT;
public static String MYSQL_TABLES_IMPORT;
public static String MYSQL_TABLES_EXPORT;
public static String IS_SEPERATED_TABLES;
public static String HDFS_URI;
public static String IMPORT_OUT_DIR;
public static String EXPORT_OUT_DIR;
public static String IMPORT_MIGRATION_TYPE = "import";
public static String EXPORT_MIGRATION_TYPE = "export";
static {
Path path = Paths.get("configuration.properties");
Properties configuration = new Properties();
try {
configuration.load(Files.newBufferedReader(path));
MYSQL_USER = configuration.getProperty("mysql.username", "root");
MYSQL_PASSWORD = configuration.getProperty("mysql.password", "000000");
MYSQL_HOST = configuration.getProperty("mysql.host", "hadoop102");
MYSQL_PORT = configuration.getProperty("mysql.port", "3306");
MYSQL_DATABASE_IMPORT = configuration.getProperty("mysql.database.import", "ad");
MYSQL_DATABASE_EXPORT = configuration.getProperty("mysql.database.export", "ad_report");
MYSQL_URL_IMPORT = "jdbc:mysql://" + MYSQL_HOST + ":" + MYSQL_PORT + "/" + MYSQL_DATABASE_IMPORT + "?useSSL=false&allowPublicKeyRetrieval=true&useUnicode=true&characterEncoding=utf-8";
MYSQL_URL_EXPORT = "jdbc:mysql://" + MYSQL_HOST + ":" + MYSQL_PORT + "/" + MYSQL_DATABASE_EXPORT + "?useSSL=false&allowPublicKeyRetrieval=true&useUnicode=true&characterEncoding=utf-8";
MYSQL_TABLES_IMPORT = configuration.getProperty("mysql.tables.import", "");
MYSQL_TABLES_EXPORT = configuration.getProperty("mysql.tables.export", "");
IS_SEPERATED_TABLES = configuration.getProperty("is.seperated.tables", "0");
HDFS_URI = configuration.getProperty("hdfs.uri", "hdfs://hadoop102:8020");
IMPORT_OUT_DIR = configuration.getProperty("import_out_dir");
EXPORT_OUT_DIR = configuration.getProperty("export_out_dir");
} catch (IOException e) {
MYSQL_USER = "root";
MYSQL_PASSWORD = "000000";
MYSQL_HOST = "hadoop102";
MYSQL_PORT = "3306";
MYSQL_DATABASE_IMPORT = "ad";
MYSQL_DATABASE_EXPORT = "ad_report";
MYSQL_URL_IMPORT = "jdbc:mysql://" + MYSQL_HOST + ":" + MYSQL_PORT + "/" + MYSQL_DATABASE_IMPORT + "?useSSL=false&allowPublicKeyRetrieval=true&useUnicode=true&characterEncoding=utf-8";
MYSQL_URL_EXPORT = "jdbc:mysql://" + MYSQL_HOST + ":" + MYSQL_PORT + "/" + MYSQL_DATABASE_EXPORT + "?useSSL=false&allowPublicKeyRetrieval=true&useUnicode=true&characterEncoding=utf-8";
MYSQL_TABLES_IMPORT = "";
MYSQL_TABLES_EXPORT = "";
IS_SEPERATED_TABLES = "0";
HDFS_URI = "hdfs://hadoop102:8020";
IMPORT_OUT_DIR = null;
EXPORT_OUT_DIR = null;
}
}
public static void main(String[] args) {
System.out.println(MYSQL_DATABASE_EXPORT);
}
}
1.6 新建com.atguigu.datax.helper.DataxJsonHelper类
package com.atguigu.datax.helper;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import com.atguigu.datax.beans.Table;
import com.atguigu.datax.configuration.Configuration;
public class DataxJsonHelper {
// 解析 inputConfig 和 outputConfig 模板
// Hadoop 单点集群
private final JSONObject inputConfig = JSONUtil.parseObj("{\"job\":{\"content\":[{\"reader\":{\"name\":\"mysqlreader\",\"parameter\":{\"column\":[],\"connection\":[{\"jdbcUrl\":[],\"table\":[]}],\"password\":\"\",\"splitPk\":\"\",\"username\":\"\"}},\"writer\":{\"name\":\"hdfswriter\",\"parameter\":{\"column\":[],\"compress\":\"gzip\",\"defaultFS\":\"\",\"fieldDelimiter\":\"\\t\",\"fileName\":\"content\",\"fileType\":\"text\",\"path\":\"${targetdir}\",\"writeMode\":\"truncate\",\"nullFormat\":\"\"}}}],\"setting\":{\"speed\":{\"channel\":1}}}}");
private final JSONObject outputConfig = JSONUtil.parseObj("{\"job\":{\"setting\":{\"speed\":{\"channel\":1}},\"content\":[{\"reader\":{\"name\":\"hdfsreader\",\"parameter\":{\"path\":\"${exportdir}\",\"defaultFS\":\"\",\"column\":[\"*\"],\"fileType\":\"text\",\"encoding\":\"UTF-8\",\"fieldDelimiter\":\"\\t\",\"nullFormat\":\"\\\\N\"}},\"writer\":{\"name\":\"mysqlwriter\",\"parameter\":{\"writeMode\":\"replace\",\"username\":\"\",\"password\":\"\",\"column\":[],\"connection\":[{\"jdbcUrl\":\"\",\"table\":[]}]}}}]}}");
// Hadoop HA 集群
// private final JSONObject inputConfig = JSONUtil.parseObj("{\"job\": {\"content\": [{\"reader\": {\"name\": \"mysqlreader\",\"parameter\": {\"column\": [],\"connection\": [{\"jdbcUrl\": [],\"table\": []}],\"password\": \"\",\"splitPk\": \"\",\"username\": \"\"}},\"writer\": {\"name\": \"hdfswriter\",\"parameter\": {\"column\": [],\"compress\": \"gzip\",\"defaultFS\": \"hdfs://mycluster\",\"dfs.nameservices\": \"mycluster\",\"dfs.ha.namenodes.mycluster\": \"namenode1,namenode2\",\"dfs.namenode.rpc-address.aliDfs.namenode1\": \"hdfs://hadoop102:8020\",\"dfs.namenode.rpc-address.aliDfs.namenode2\": \"hdfs://hadoop103:8020\",\"dfs.client.failover.proxy.provider.mycluster\": \"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider\",\"fieldDelimiter\": \"\\t\",\"fileName\": \"content\",\"fileType\": \"text\",\"path\": \"${targetdir}\",\"writeMode\": \"truncate\",\"nullFormat\": \"\"}}}],\"setting\": {\"speed\": {\"channel\": 1}}}}");
// private final JSONObject outputConfig = JSONUtil.parseObj("{\"job\": {\"setting\": {\"speed\": {\"channel\": 1}},\"content\": [{\"reader\": {\"name\": \"hdfsreader\",\"parameter\": {\"path\": \"${exportdir}\",\"defaultFS\": \"\",\"dfs.nameservices\": \"mycluster\",\"dfs.ha.namenodes.mycluster\": \"namenode1,namenode2\",\"dfs.namenode.rpc-address.aliDfs.namenode1\": \"hdfs://hadoop102:8020\",\"dfs.namenode.rpc-address.aliDfs.namenode2\": \"hdfs://hadoop103:8020\",\"dfs.client.failover.proxy.provider.mycluster\": \"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider\",\"column\": [\"*\"],\"fileType\": \"text\",\"encoding\": \"UTF-8\",\"fieldDelimiter\": \"\\t\",\"nullFormat\": \"\\\\N\"}},\"writer\": {\"name\": \"mysqlwriter\",\"parameter\": {\"writeMode\": \"replace\",\"username\": \"\",\"password\": \"\",\"column\": [],\"connection\": [{\"jdbcUrl\": [],\"table\": []}]}}}]}}");
public DataxJsonHelper() {
// 获取 Reader 和 Writer 配置
JSONObject mysqlReaderPara = inputConfig.getByPath("job.content[0].reader.parameter", JSONObject.class);
JSONObject hdfsWriterPara = inputConfig.getByPath("job.content[0].writer.parameter", JSONObject.class);
JSONObject hdfsReaderPara = outputConfig.getByPath("job.content[0].reader.parameter", JSONObject.class);
JSONObject mysqlWriterPara = outputConfig.getByPath("job.content[0].writer.parameter", JSONObject.class);
// 设置 DefaultFS
hdfsReaderPara.set("defaultFS", Configuration.HDFS_URI);
hdfsWriterPara.set("defaultFS", Configuration.HDFS_URI);
// 设置 MySQL Username
mysqlReaderPara.set("username", Configuration.MYSQL_USER);
mysqlWriterPara.set("username", Configuration.MYSQL_USER);
// 设置 MySQL Password
mysqlReaderPara.set("password", Configuration.MYSQL_PASSWORD);
mysqlWriterPara.set("password", Configuration.MYSQL_PASSWORD);
// 设置 JDBC URL
mysqlReaderPara.putByPath("connection[0].jdbcUrl[0]", Configuration.MYSQL_URL_IMPORT);
mysqlWriterPara.putByPath("connection[0].jdbcUrl", Configuration.MYSQL_URL_EXPORT);
// 写回Reader和Writer配置
inputConfig.putByPath("job.content[0].reader.parameter", mysqlReaderPara);
inputConfig.putByPath("job.content[0].writer.parameter", hdfsWriterPara);
outputConfig.putByPath("job.content[0].reader.parameter", hdfsReaderPara);
outputConfig.putByPath("job.content[0].writer.parameter", mysqlWriterPara);
}
public void setTableAndColumns(Table table, int index, String migrationType) {
// 设置表名
setTable(table, index, migrationType);
// 设置列名及路径
setColumns(table, migrationType);
}
public void setColumns(Table table, String migrationType) {
if (migrationType.equals("import")) {
// 设置 hdfswriter 文件名
inputConfig.putByPath("job.content[0].writer.parameter.fileName", table.name());
// 设置列名
inputConfig.putByPath("job.content[0].reader.parameter.column", table.getColumnNames());
inputConfig.putByPath("job.content[0].writer.parameter.column", table.getColumnNamesAndTypes());
} else {
// 设置列名
outputConfig.putByPath("job.content[0].writer.parameter.column", table.getColumnNames());
}
}
public void setTable(Table table, int index, String migrationType) {
if (migrationType.equals("import")) {
// 设置表名
inputConfig.putByPath("job.content[0].reader.parameter.connection[0].table[" + index + "]", table.name());
} else {
outputConfig.putByPath("job.content[0].writer.parameter.connection[0].table[" + index + "]", table.name());
}
}
public JSONObject getInputConfig() {
return inputConfig;
}
public JSONObject getOutputConfig() {
return outputConfig;
}
}
1.7 新建com.atguigu.datax.helper.MysqlHelper类
package com.atguigu.datax.helper;
import cn.hutool.db.Db;
import cn.hutool.db.Entity;
import cn.hutool.db.ds.DSFactory;
import cn.hutool.setting.Setting;
import com.atguigu.datax.beans.Table;
import com.atguigu.datax.configuration.Configuration;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
public class MysqlHelper {
private final List<Table> tables;
public List<Table> getTables() {
return tables;
}
public MysqlHelper(String url, String database, String mysqlTables) {
tables = new ArrayList<>();
Db db = Db.use(DSFactory.create(
Setting.create()
.set("url", url)
.set("user", Configuration.MYSQL_USER)
.set("pass", Configuration.MYSQL_PASSWORD)
.set("showSql", "false")
.set("showParams", "false")
.set("sqlLevel", "info")
).getDataSource());
// 获取设置的表格,如未设置,查询数据库下面所有表格
if (mysqlTables != null && !"".equals(mysqlTables)) {
for (String mysqlTable : mysqlTables.split(",")) {
tables.add(new Table(mysqlTable));
}
} else {
try {
db.findAll(Entity.create("information_schema.TABLES")
.set("TABLE_SCHEMA", database))
.forEach(entity ->
tables.add(new Table(entity.getStr("TABLE_NAME"))));
} catch (SQLException e) {
throw new RuntimeException(e);
}
}
// 获取所有表格的列
for (Table table : tables) {
try {
db.findAll(Entity.create("information_schema.COLUMNS")
.set("TABLE_SCHEMA", database)
.set("TABLE_NAME", table.name())
).stream()
.sorted(Comparator.comparingInt(o -> o.getInt("ORDINAL_POSITION")))
.forEach(entity -> table.addColumn(
entity.getStr("COLUMN_NAME"),
entity.getStr("DATA_TYPE")
));
} catch (SQLException e) {
throw new RuntimeException(e);
}
}
}
}
1.8 新建com.atguigu.datax.Main类
package com.atguigu.datax;
import cn.hutool.json.JSONUtil;
import com.atguigu.datax.beans.Table;
import com.atguigu.datax.configuration.Configuration;
import com.atguigu.datax.helper.DataxJsonHelper;
import com.atguigu.datax.helper.MysqlHelper;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
public class Main {
public static void main(String[] args) throws IOException {
// 生成 HDFS 入方向配置文件
if (Configuration.IMPORT_OUT_DIR != null &&
!Configuration.IMPORT_OUT_DIR.equals("")) {
MysqlHelper mysqlHelper = new MysqlHelper(
Configuration.MYSQL_URL_IMPORT,
Configuration.MYSQL_DATABASE_IMPORT,
Configuration.MYSQL_TABLES_IMPORT);
DataxJsonHelper dataxJsonHelper = new DataxJsonHelper();
// 获取迁移操作类型
String migrationType = Configuration.IMPORT_MIGRATION_TYPE;
// 创建父文件夹
Files.createDirectories(Paths.get(Configuration.IMPORT_OUT_DIR));
List<Table> tables = mysqlHelper.getTables();
// 判断传入的表是否为分表,根据判断结果采用不同的处理策略
if (Configuration.IS_SEPERATED_TABLES.equals("1")) {
for (int i = 0; i < tables.size(); i++) {
Table table = tables.get(i);
dataxJsonHelper.setTable(table, i, migrationType);
}
dataxJsonHelper.setColumns(tables.get(0), migrationType);
// 输出最终Json配置
FileWriter inputWriter = new FileWriter(Configuration.IMPORT_OUT_DIR + "/" + Configuration.MYSQL_DATABASE_IMPORT + "." + tables.get(0).name() + ".json");
JSONUtil.toJsonStr(dataxJsonHelper.getInputConfig(), inputWriter);
inputWriter.close();
} else {
for (Table table : tables) {
// 设置表信息
dataxJsonHelper.setTableAndColumns(table, 0, migrationType);
// 输出最终Json配置
FileWriter inputWriter = new FileWriter(Configuration.IMPORT_OUT_DIR + "/" + Configuration.MYSQL_DATABASE_IMPORT + "." + table.name() + ".json");
JSONUtil.toJsonStr(dataxJsonHelper.getInputConfig(), inputWriter);
inputWriter.close();
}
}
}
// 生成 HDFS 出方向配置文件
if (Configuration.EXPORT_OUT_DIR != null &&
!"".equals(Configuration.EXPORT_OUT_DIR)) {
MysqlHelper mysqlHelper = new MysqlHelper(
Configuration.MYSQL_URL_EXPORT,
Configuration.MYSQL_DATABASE_EXPORT,
Configuration.MYSQL_TABLES_EXPORT);
DataxJsonHelper dataxJsonHelper = new DataxJsonHelper();
// 获取迁移操作类型
String migrationType = Configuration.EXPORT_MIGRATION_TYPE;
// 创建父文件夹
Files.createDirectories(Paths.get(Configuration.EXPORT_OUT_DIR));
List<Table> tables = mysqlHelper.getTables();
if (Configuration.IS_SEPERATED_TABLES.equals("1")) {
for (int i = 0; i < tables.size(); i++) {
Table table = tables.get(i);
dataxJsonHelper.setTable(table, i, migrationType);
}
dataxJsonHelper.setColumns(tables.get(0), migrationType);
// 输出最终Json配置
FileWriter outputWriter = new FileWriter(Configuration.EXPORT_OUT_DIR + "/" + Configuration.MYSQL_DATABASE_EXPORT + "." + tables.get(0).name() + ".json");
JSONUtil.toJsonStr(dataxJsonHelper.getOutputConfig(), outputWriter);
outputWriter.close();
}
for (Table table : tables) {
// 设置表信息
dataxJsonHelper.setTableAndColumns(table, 0, migrationType);
// 输出最终Json配置
FileWriter outputWriter = new FileWriter(Configuration.EXPORT_OUT_DIR + "/" + Configuration.MYSQL_DATABASE_EXPORT + "." + table.name() + ".json");
JSONUtil.toJsonStr(dataxJsonHelper.getOutputConfig(), outputWriter);
outputWriter.close();
}
}
}
}
二、如何使用
2.1 修改配置文件
在项目根目录新建配置文件:configuration.properties
key | default | description |
mysql.username | root | MySQL用户名 |
mysql.password | 000000 | MySQL密码 |
mysql.host | hadoop102 | MySQL所在Host |
mysql.port | 3306 | MySQL端口号 |
mysql.database.import | ad | 导入HDFS的数据库 |
mysql.database.export | ad_report | 从HDFS导出的数据库 |
mysql.tables.import | "" | 需要导入的表,空字符串表示全部导入 |
mysql.tables.export | "" | 需要导出的表,空字符串表示全部导出 |
is.seperated.tables | 0 | 是否为分表,0为否 |
hdfs.uri | hdfs://hadoop102:8020 | HDFS Namenode 地址 |
import_out_dir | null | 导入HDFS的配置文件输出地址 |
export_out_dir | null | 从HDFS导出的配置文件输出地址 |
文件内容如下。
mysql.username=root
mysql.password=000000
mysql.host=hadoop102
mysql.port=3306
mysql.database.import=ad
mysql.database.export=ad_report
mysql.tables.import=
mysql.tables.export=
is.seperated.tables=0
hdfs.uri=hdfs://hadoop102:8020
import_out_dir=d:/output/import
export_out_dir=d:/output/export
2.2 运行
执行mvn clean package打包,target目录会生成datax-config-generator-1.0-SNAPSHOT-jar-with-dependencies.jar。和configuration.properties文件一并拷入Linux,执行即可:
[atguigu@hadoop102 ~]$ java -jar datax-config-generator-1.0-SNAPSHOT-jar-with-dependencies.jar