mysql的DDL语句如下:
CREATE TABLE `gg_zr` (
`id` bigint(20) NOT NULL COMMENT '责任信息表主键id',
`zrdm` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '责任代码',
`zrmc` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '责任名称',
`delflag` char(1) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT '0' COMMENT '删除标志(0.存在;1.删除)',
`version` int(11) DEFAULT '1' COMMENT '版本号',
`createby` bigint(20) DEFAULT NULL COMMENT '创建人',
`createtime` datetime DEFAULT NULL COMMENT '创建时间',
`updateby` bigint(20) DEFAULT NULL COMMENT '修改人',
`updatetime` datetime DEFAULT NULL COMMENT '修改时间',
PRIMARY KEY (`id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC COMMENT='责任信息表';
解析oracle表生成高斯内表入口:Mysql2GaussCreateMana.scala
package com.tpiods.sqoop.mysql2gauss
import java.io.{File, FileInputStream, PrintWriter}
/**
* 读取mysql建表语句,生成高斯内表
* 读取oceanbase建表语句,生成高斯内表
* 仅93行提取主键的逻辑,mysql与oceanbase有差异
*/
object Mysql2GaussCreateMana {
def main(args: Array[String]): Unit = {
// 指定输入sql语句的文件
val input = "ods_etl/src/main/resources/work0409_test1/test1_tab.sql"
val lines = scala.io.Source.fromInputStream(new FileInputStream(input)).getLines()
val str = readSql(lines)
val result = replaceSql(str)
// 读上一步输出的文件
val str2 = addTeacCols3(result)
// 指定输出sql语句的文件(增加技术字段)
val output2 = "ods_etl/src/main/resources/work0409_test1/test1_tab_mana.sql"
val writer2 = new PrintWriter(new File(output2))
writer2.write(str2)
writer2.close()
}
/**
*
* @param lines 从文件读入的源系统建表语句
* @return 返回转换后的建表语句
*/
def readSql(lines: Iterator[String]): String = {
var first = 0
val sb = new StringBuilder
var pkName: String = null
for (line <- lines if line != null) {
if(line.startsWith("CREATE")) {
val tabName = line.split("\\s+")(2)
first += 1
if (sb.nonEmpty) {
// 不按主键做hash,则注释掉下面这段代码
sb.append(
s"""
|)
|with (orientation=column, compression=low)
|distribute by hash${pkName};""".stripMargin)
sb.append(
s"""
|
|set search_path = ods;
|drop table if exists ${tabName};
|create table if not exists ${tabName} (\t
|""".stripMargin)
}
else {
sb.append(
s"""
|
|set search_path = tpods;
|drop table if exists ${tabName};
|create table if not exists ${tabName} (\t
|""".stripMargin)
}
}
if (line.startsWith(" `")) {
val split = line.split("\\s+")
if (first == 1) {
val ln = (split(1) + "\t" + split(2)).toLowerCase
if (ln.indexOf("char(") > 0) {
val l1 = ln.split("\\(")(0)
val l2 = ln.split("\\(")(1)
val l3 = l2.substring(0, l2.length - 1)
val l4 = Integer.valueOf(l3) * 3
sb.append(l1).append("(").append(l4).append(")")
}
else sb.append(split(1)).append("\t").append(split(2))
}
else {
val ln = (split(1) + "\t" + split(2)).toLowerCase
if (ln.indexOf("char(") > 0) {
val l1 = ln.split("\\(")(0)
val l2 = ln.split("\\(")(1)
val l3 = l2.substring(0, l2.length - 1)
val l4 = Integer.valueOf(l3) * 3
sb.append(",").append(l1).append("(").append(l4).append(")")
}
else sb.append(",").append(split(1)).append("\t").append(split(2))
}
sb.append("\n")
first = 0
}
if (line.startsWith(" PRIMARY KEY")) {
//取mysql 主键
pkName = line.split("\\s+")(3)
}
}
sb +
s"""
|)
|WITH (orientation=column, compression=low)
|distribute by hash${pkName};
|""".stripMargin
// 不按主键做hash,打开下面这段代码
// sb.toString()
}
/**
*
* @param sql 读入转换后的sql
* @return 对字段类型进行替换,返回最终的sql
*/
def replaceSql(sql: String): String = {
val res = sql.toLowerCase.replaceAll("`", "")
.replaceAll("\\b" + "datetime" + "\\b", "date")
.replaceAll("tinyint\\(\\d*\\)", "int")
.replaceAll("smallint\\(\\d*\\)", "int")
.replaceAll("int\\(\\d*\\)", "int")
.replaceAll("bigint\\(\\d*\\)", "bigint")
.replaceAll("float\\(\\d*,\\d*\\)", "float")
.replaceAll("double\\(\\d*,\\d*\\)", "float")
.replaceAll("timestamp\\(\\d*\\)", "timestamp")
.replaceAll("date\\(\\d*\\)", "date")
.replaceAll("\\b" + "longtext" + "\\b", "text")
.replaceAll("\\b" + "longblob" + "\\b", "varchar")
.replaceAll(",\n", "\n")
.replaceAll("mediumtext", "text")
res + "\n"
}
/**
* 按主键做hash
* @param lines2 读入字段类型替换后的sql
* @return 字段后面增加技术字段
*/
def addTeacCols(lines2: String): String = {
lines2.replaceAll("\n\\)\nwith",
""",oper varchar
|,mtime numeric(38,5)
|,source_sys varchar
|,etl_time varchar
|,etl_date varchar
|)
|with""".stripMargin)
}
/**
* 不按主键做hash,调用这个方法
* @param lines2 读入字段类型替换后的sql
* @return 字段后面增加技术字段
*/
def addTeacCols2(lines2: String): String = {
lines2.replaceAll("\\);",
""",oper varchar
|,mtime numeric(38,5)
|,source_sys varchar
|,etl_time varchar
|,etl_date varchar
|);
|""".stripMargin)
}
/**
* 按主键做hash
* @param lines2 读入字段类型替换后的sql
* @return 字段前面增加技术字段
*/
def addTeacCols3(lines2: String): String = {
lines2.replaceAll("\t\n",
"""
| tabname varchar
|,oper varchar
|,mtime numeric(38,5)
|,source_sys varchar
|,etl_time varchar
|,etl_date varchar
|,""".stripMargin)
}
/**
* 不按主键做hash,调用这个方法
* @param lines2 读入字段类型替换后的sql
* @return 字段前面增加技术字段
*/
def addTeacCols4(lines2: String): String = {
lines2.replaceAll("\t\n",
"""
| tabname varchar
|,oper varchar
|,mtime numeric(38,5)
|,source_sys varchar
|,etl_time varchar
|,etl_date varchar
|,""".stripMargin)
}
}
运行以上代码,输出如下:
set search_path = ods;
drop table if exists gg_zr;
create table if not exists gg_zr (
tabname varchar
,oper varchar
,mtime numeric(38,5)
,source_sys varchar
,etl_time varchar
,etl_date varchar
,id bigint
,zr_dm varchar(96)
,zr_mc varchar(192)
,del_flag char(3)
,version int
,create_by bigint
,create_time date
,update_by bigint
,update_time date
)
with (orientation=column, compression=low)
distribute by hash(id);
解析oracle表生成表名字段主键配置入口:MysqlGeneTabColsPk.scala
package com.tpiods.sqoop.mysql2gauss
import java.io.{File, FileInputStream, PrintWriter}
/**
* 输入: 源系统建表语句
* 输出: 表名|列名...|主键
* 仅36行提取主键的逻辑,mysql与oceanbase有差异
*/
object MysqlGeneTabColsPk {
def main(args: Array[String]): Unit = {
// 指定输入sql语句的文件
val input = "ods_etl/src/main/resources/work0409_test1/test1_tab.sql"
val lines = scala.io.Source.fromInputStream(new FileInputStream(input)).getLines()
val str = readSql(lines)
// 指定输出sql语句的文件
val output1 = "ods_etl/src/main/resources/work0409_test1/test1_tab_tabcolspk.txt"
val writer = new PrintWriter(new File(output1))
writer.write(str)
writer.close()
}
def readSql(lines: Iterator[String]): String = {
val sb = new StringBuilder
for (tempString <- lines if tempString != null) {
if (tempString.startsWith("CREATE")) {
val tabName = tempString.split("TABLE `")(1)
sb.append("\n").append(tabName).append("|")
}
if (tempString.startsWith(" `")) {
val colName = tempString.split("\\s+")(1)
sb.append(colName).append(",")
}
if (tempString.startsWith(" PRIMARY KEY")) {
// var pkName = tempString.split("\\s+")(3)
//取oceanbase 主键
var pkName = tempString.replaceAll(" PRIMARY KEY ", "")
if (pkName.endsWith(",")) {
pkName = pkName.substring(0, pkName.length - 1).replaceAll(" ", "")
}
sb.append("|").append(pkName)
}
}
sb.toString().toLowerCase()
.replaceAll("` ", "")
.replaceAll("`", "")
.replaceAll("\\(", "")
.replaceAll("\\)", "")
.replaceAll(",\\|", "\\|")
.replaceAll(",\n", "\n")
}
}
运行以上代码,输出如下:
gg_zr|id,zr_dm,zr_mc,del_flag,version,create_by,create_time,update_by,update_time|id