1 背景
由于数据同步和数据汇聚中,常常需要从多个数据源中获取数据,这些数据源可能来自不同的厂商、不同的部门、甚至不同的国家或地区。这些数据源之间的数据格式、数据结构、甚至数据语言都可能存在差异,因此需要将它们进行结构转换,以便能够在一个平台上对这些数据进行管理、分析和使用。
2 技术实现
2.1 自写工具类实现
1 公用类
import lombok.Data;
@Data
public class DdlChangeInfo {
/**
* 表名
*/
private String tableName;
/**
* 来源数据库DDL
*/
private String sourceDdl;
/**
* 目标数据库DDL
*/
private String targetDdl;
/**
* 来源数据库类型
*/
private String sourceSqlType;
/**
* 目标数据库类型
*/
private String targetSqlType;
}
import java.util.List;
public interface IDdlChangeStartegy {
void change(List<DdlChangeInfo> ddlChangeInfoList);
}
2 转换类
MySQL->PostgreSQL
import lombok.extern.slf4j.Slf4j;
import net.sf.jsqlparser.parser.CCJSqlParserUtil;
import net.sf.jsqlparser.schema.Table;
import net.sf.jsqlparser.statement.Statements;
import net.sf.jsqlparser.statement.create.table.ColumnDefinition;
import net.sf.jsqlparser.statement.create.table.CreateTable;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
/**
* Mysql转Postgresql DDL策略
*/
@Slf4j
public class Mysql2PosgresqlDdlChangeStartgy implements IDdlChangeStartegy {
@Override
public void change(List<DdlChangeInfo> ddlChangeInfoList) {
for (DdlChangeInfo ddlChangeInfo : ddlChangeInfoList) {
String ddl = ddlChangeInfo.getSourceDdl().replaceAll("current_timestamp\\(\\) ON UPDATE current_timestamp\\(\\)", "current_timestamp")
.replaceAll("current_timestamp\\(\\)", "current_timestamp")
.replaceAll("current_timestamp\\(\\)", "current_timestamp")
.replaceAll("int\\([0-9]*\\)", "INT")
.replaceAll("tinyint\\([0-9]*\\)", "INT")
.replaceAll(" tinyINT ", "INT")
.replaceAll("double\\([0-9]*,[0-9]*\\)", "float8")
.replaceAll(" double ", " float8 ")
.replaceAll(" datetime ", " timestamp(6) ")
.replaceAll("DEFAULT 00000000000", "DEFAULT 0")
.replaceAll("unsigned zerofill", "")
.replaceAll("bigint\\([0-9]*\\) NOT NULL AUTO_INCREMENT", "BIGSERIAL PRIMARY KEY")
.replaceAll("bigint\\([0-9]*\\)", "INT8");
try {
Statements statements = CCJSqlParserUtil.parseStatements(ddl);
statements.getStatements()
.stream()
.map(statement -> (CreateTable) statement).forEach(ct -> {
Table table = ct.getTable();
List<ColumnDefinition> columnDefinitions = ct.getColumnDefinitions();
List<String> comments = new ArrayList<>();
List<ColumnDefinition> collect = columnDefinitions.stream()
.peek(columnDefinition -> {
List<String> columnSpecStrings = columnDefinition.getColumnSpecStrings();
int commentIndex = getCommentIndex(columnSpecStrings);
if (commentIndex != -1) {
int commentStringIndex = commentIndex + 1;
String commentString = columnSpecStrings.get(commentStringIndex);
String commentSql = genCommentSql(table.toString(), columnDefinition.getColumnName(), commentString);
comments.add(commentSql);
columnSpecStrings.remove(commentStringIndex);
columnSpecStrings.remove(commentIndex);
}
columnDefinition.setColumnSpecStrings(columnSpecStrings);
}).collect(Collectors.toList());
ct.setColumnDefinitions(collect);
String createSQL = ct.toString()
.replaceAll("`", "\"")
.replaceAll("BIGINT UNIQUE NOT NULL AUTO_INCREMENT", "BIGSERIAL PRIMARY KEY")
.replaceAll("BIGINT NULL AUTO_INCREMENT", "BIGSERIAL PRIMARY KEY")
.replaceAll("BIGINT NOT NULL AUTO_INCREMENT", "BIGSERIAL PRIMARY KEY")
.replaceAll("INT NOT NULL AUTO_INCREMENT", "BIGSERIAL PRIMARY KEY")
.replaceAll("INT NULL AUTO_INCREMENT", "BIGSERIAL PRIMARY KEY")
.replaceAll("IF NOT EXISTS", "")
.replaceAll("TINYINT", "SMALLINT")
.replaceAll("DATETIME", "TIMESTAMP")
.replaceAll(", PRIMARY KEY \\(\"id\"\\)", "");
// 如果存在表注释
if (createSQL.contains("COMMENT")) {
createSQL = createSQL.substring(0, createSQL.indexOf("COMMENT"));
}
StringBuilder postgresqlDdl = new StringBuilder();
postgresqlDdl.append(createSQL + ";");
comments.forEach(t -> postgresqlDdl.append(t.replaceAll("`", "\"") + ";"));
ddlChangeInfo.setTargetDdl(postgresqlDdl.toString());
});
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
}
/**
* 获得注释的下标
*
* @param columnSpecStrings columnSpecStrings
* @return 下标
*/
private static int getCommentIndex(List<String> columnSpecStrings) {
for (int i = 0; i < columnSpecStrings.size(); i++) {
if ("COMMENT".equalsIgnoreCase(columnSpecStrings.get(i))) {
return i;
}
}
return -1;
}
/**
* 生成COMMENT语句
*
* @param table 表名
* @param column 字段名
* @param commentValue 描述文字
* @return COMMENT语句
*/
private static String genCommentSql(String table, String column, String commentValue) {
return String.format("COMMENT ON COLUMN %s.%s IS %s", table, column, commentValue);
}
}
其他类型的转换待补充
3 测试类
MySQL->PostgreSQL
import net.sf.jsqlparser.JSQLParserException;
import java.util.ArrayList;
import java.util.List;
public class MysqlDdl2PgDdlTestMain {
public static void main(String[] args) throws JSQLParserException {
List<DdlChangeInfo> ddlChangeInfoList = new ArrayList<>();
DdlChangeInfo ddlChangeInfo0 = new DdlChangeInfo();
ddlChangeInfo0.setSourceDdl("CREATE TABLE `t_order` (" +
" `id` int(11) NOT NULL AUTO_INCREMENT COMMENT '主键'," +
" `goods_id` int(11) DEFAULT NULL COMMENT '商品id'," +
" `goods_name` varchar(255) DEFAULT NULL COMMENT '商品名称'," +
" `dealer_id` int(11) DEFAULT NULL COMMENT '经销商id'," +
" `dealer_name` varchar(255) DEFAULT NULL COMMENT '经销商名称'," +
" `goods_num` int(11) DEFAULT NULL COMMENT '商品数量'," +
" `amount` decimal(10,2) DEFAULT NULL COMMENT '折前金额'," +
" `discount_amount` decimal(10,2) DEFAULT NULL COMMENT '折后金额'," +
" `discount` double DEFAULT NULL COMMENT '折扣'," +
" `order_time` datetime DEFAULT NULL COMMENT '订单时间'," +
" `status` int(11) DEFAULT NULL COMMENT '订单状态,0:未支付 1:已支付'," +
" `create_time` datetime DEFAULT NULL COMMENT '创建时间'," +
" `update_time` datetime DEFAULT NULL COMMENT '更新时间'," +
" PRIMARY KEY (`id`)" +
");");
ddlChangeInfo0.setTableName("t_order");
ddlChangeInfoList.add(ddlChangeInfo0);
DdlChangeInfo ddlChangeInfo1 = new DdlChangeInfo();
ddlChangeInfo1.setSourceDdl("CREATE TABLE `t_order_1` (" +
" `id` int(11) NOT NULL AUTO_INCREMENT COMMENT '主键'," +
" `goods_id` int(11) DEFAULT NULL COMMENT '商品id'," +
" `goods_name` varchar(255) DEFAULT NULL COMMENT '商品名称'," +
" `discount` double DEFAULT NULL COMMENT '折扣'," +
" `order_time` datetime DEFAULT NULL COMMENT '订单时间'," +
" `status` int(11) DEFAULT NULL COMMENT '订单状态,0:未支付 1:已支付'," +
" `create_time` datetime DEFAULT NULL COMMENT '创建时间'," +
" `update_time` datetime DEFAULT NULL COMMENT '更新时间'," +
" PRIMARY KEY (`id`)" +
");");
ddlChangeInfo1.setTableName("t_order_1");
ddlChangeInfoList.add(ddlChangeInfo1);
System.out.println("init:" + ddlChangeInfoList);
Mysql2PosgresqlDdlChangeStartgy mysql2PosgresqlDdlChangeStartgy = new Mysql2PosgresqlDdlChangeStartgy();
mysql2PosgresqlDdlChangeStartgy.change(ddlChangeInfoList);
System.out.println("changed:" + ddlChangeInfoList);
}
}
其他类型待待补充
2.2 基于dbswitch实现
1 介绍
dbswitch是一款数据库迁移工具,它可以帮助用户轻松地将一个数据库系统中的数据迁移到另一个数据库系统中。它支持多种常见的关系型数据库系统,例如MySQL、Oracle、SQL Server、PostgreSQL等。dbswitch提供了丰富的转换选项和功能,可以帮助用户解决在不同数据库系统之间存在的格式差异、数据类型转换、编码转换等问题,确保迁移后的数据准确、完整、一致性。dbswitch的优势在于它的灵活性和可定制性。用户可以根据自己的需求和环境,选择不同的转换方式和配置选项,以便更好地控制迁移过程和结果。
2 实践
待补充
参考文档:
DBSwitch阉割版实现异构数据库表结构同步_程序员小王java的博客-CSDN博客