自定义udf把mysql的表结构自动转换成clickhouse的表结构

5 篇文章 0 订阅
4 篇文章 0 订阅

1,因业务需求,需要把mysql的数据同步到clickhouse,但是clickhouse建表语法有比较繁琐,故写了一个udf来生成数据

1.0 相关pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>hdp-udf</groupId>
	<artifactId>hdp-udf</artifactId>
	<version>4.1</version>
	<packaging>jar</packaging>

	<name>hdp-udf</name>
	<url>http://maven.apache.org</url>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
		<hive.version>3.1.0</hive.version>
		<hadoop.version>3.1.0</hadoop.version>
	</properties>

	<dependencies>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-client</artifactId>
			<version>${hadoop.version}</version>
			<exclusions>
				<exclusion>
					<groupId>log4j</groupId>
					<artifactId>log4j</artifactId>
				</exclusion>
				<exclusion>
					<groupId>org.slf4j</groupId>
					<artifactId>slf4j-log4j12</artifactId>
				</exclusion>
			</exclusions>
			<scope>provided</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-common</artifactId>
			<version>${hadoop.version}</version>
			<scope>provided</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.hive</groupId>
			<artifactId>hive-exec</artifactId>
			<version>${hive.version}</version>
			<scope>provided</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.hive</groupId>
			<artifactId>hive-jdbc</artifactId>
			<version>2.1.0.2.6.4.76-1</version>
			<scope>provided</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.hive</groupId>
			<artifactId>hive-service</artifactId>
			<version>2.1.0</version>
			<scope>provided</scope>
		</dependency>

		<!-- 自研组件依赖 -->
		<dependency>
			<groupId>com.crgecent</groupId>
			<artifactId>crgt-util</artifactId>
			<version>1.1.0</version>
		</dependency>

		<dependency>
			<groupId>redis.clients</groupId>
			<artifactId>jedis</artifactId>
			<version>2.9.0</version>
		</dependency>
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.12</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.kafka</groupId>
			<artifactId>kafka-clients</artifactId>
			<version>1.1.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.kafka</groupId>
			<artifactId>kafka_2.11</artifactId>
			<version>1.1.1</version>
		</dependency>
		<dependency>
			<groupId>mysql</groupId>
			<artifactId>mysql-connector-java</artifactId>
			<version>5.1.25</version>
		</dependency>
		<dependency>
			<groupId>ru.yandex.clickhouse</groupId>
			<artifactId>clickhouse-jdbc</artifactId>
			<version>0.2</version>
		</dependency>
		<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
		<dependency>
			<groupId>com.alibaba</groupId>
			<artifactId>fastjson</artifactId>
			<version>1.2.62</version>
		</dependency>
	</dependencies>

	<build>
		<finalName>hiveudf_${version}</finalName>
		<resources>
			<resource>
				<directory>src/main/java</directory>
			</resource>
		</resources>
		<plugins>
			<plugin>
				<groupId>net.alchim31.maven</groupId>
				<artifactId>scala-maven-plugin</artifactId>
				<version>3.3.1</version>
				<executions>
					<execution>
						<id>scala-compile-first</id>
						<phase>process-resources</phase>
						<goals>
							<goal>add-source</goal>
							<goal>compile</goal>
						</goals>
					</execution>

					<execution>
						<phase>compile</phase>
						<goals>
							<goal>compile</goal>
							<goal>testCompile</goal>
						</goals>
					</execution>
				</executions>
			</plugin>
			<plugin>
				<artifactId>maven-assembly-plugin</artifactId>
				<version>3.0.0</version>
				<configuration>
					<descriptorRefs>
						<descriptorRef>jar-with-dependencies</descriptorRef>
					</descriptorRefs>
<!--					<archive>-->
<!--						<manifest>-->
<!--							<mainClass>com.crgt.BlockInstance2ck</mainClass>-->
<!--						</manifest>-->
<!--					</archive>-->
				</configuration>
				<executions>
					<execution>
						<id>make-assembly</id>
						<phase>package</phase>
						<goals>
							<goal>single</goal>
						</goals>
					</execution>
				</executions>
			</plugin>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-compiler-plugin</artifactId>
				<configuration>
					<source>8</source>
					<target>8</target>
				</configuration>
			</plugin>
		</plugins>
	</build>


	<repositories>
		<repository>
			<id>ownaliyunmaven</id>
			<name>own aliyun maven repository</name>
			<url>http://10.3.1.29:8081/repository/aliyun/</url>
		</repository>
		<repository>
			<id>ownmaven</id>
			<name>own maven repository</name>
			<url>http://10.3.1.29:8081/repository/maven-central/</url>
		</repository>

		<repository>
			<id>Hortonworks Repository</id>
			<url>http://repo.hortonworks.com/content/repositories/releases/</url>
			<releases>
				<enabled>true</enabled>
			</releases>
			<snapshots>
				<enabled>false</enabled>
			</snapshots>
		</repository>

		<repository>
			<releases>
				<enabled>true</enabled>
			</releases>
			<snapshots>
				<enabled>true</enabled>
			</snapshots>
			<id>hortonworks.other</id>
			<name>Hortonworks Other Dependencies</name>
			<url>http://repo.hortonworks.com/content/groups/public</url>
		</repository>

	</repositories>

</project>
package com.crgt;


import com.crgecent.common.util.DateUtil;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.io.Text;

import java.sql.*;
import java.util.Date;


/**
 * https://blog.csdn.net/xiao_jun_0820/article/details/76638198
 * 根据hive的表在,自动生成一个ck的表,输入值要求是带ck的中间表
 * wpp 自己开发的
 *
 *
 */
public class Mysql2CKGenericUDF extends GenericUDF {

    private static String driverName = "com.mysql.jdbc.Driver";
    StringObjectInspector keyElementOI;

    StringObjectInspector tableElementOI;

    @Override
    public Object evaluate(DeferredObject[] arg0)   throws HiveException{

            String jdbcUrl = keyElementOI.getPrimitiveJavaObject(arg0[0].get());
            String tableName = tableElementOI.getPrimitiveJavaObject(arg0[1].get());


            if(  jdbcUrl == null ||  tableName == null ){
                return new Text("获取参数错误,需要");
            }
            System.out.println("输入值 : " +jdbcUrl);

            StringBuffer res = new StringBuffer( );
            try{
               res.append("\n========================mysql转成ck的建表语句 "+ tableName+ " ==================\n");
               String ckConfInfo = getCKCreateTableInfo(jdbcUrl,tableName);
               res.append(ckConfInfo);

            }catch (Exception e){
                e.printStackTrace();
                System.out.println("获取结果异常:" +e.getMessage());
            }

            return new Text(res.toString());
    }

    public static  String getCKCreateTableInfo(String cconn_str ,String tableName) throws SQLException {

        String databaseName ="rt_dwd";
        String dbTableName = tableName;
        try {
            Class.forName(driverName);
        } catch (ClassNotFoundException e) {
            System.out.println("=================================");
            e.printStackTrace();
            System.exit(1);
        }


        String conn_str = cconn_str +"&useUnicode=true&characterEncoding=UTF8";
        Connection con = DriverManager.getConnection(conn_str);

        Statement stmt = con.createStatement();
        StringBuffer result = new StringBuffer();
        StringBuffer resultCls = new StringBuffer();
        StringBuffer insertSql = new StringBuffer();

        String createSql = "show create  table " + dbTableName ;

        ResultSet res1 = stmt.executeQuery(createSql);
        result.append("==============mysql原表的格式=============\n");
        while (res1.next()) {
//            System.out.println(res1.getString(2));
            result.append(res1.getString(2) +"\n");
        }

        String sql = "describe  " + dbTableName ;
        System.out.println("Running: " + sql);
        ResultSet res = stmt.executeQuery(sql);


        String ckTable = tableName.replaceAll("_ck$","");
        String ckDbTableName = databaseName +"." +ckTable;

        result.append("==============请确定主键和date类型的列,并把Nullable去掉=============\n");
        result.append("CREATE TABLE " +ckDbTableName +" on cluster crm_4shards_1replicas (\n");
        resultCls.append("CREATE TABLE " +ckDbTableName +"_cls on cluster crm_4shards_1replicas (\n");

        insertSql.append("INSERT INTO " +ckDbTableName +"_cls \nselect \n");

        String[] jdbcUrlArr =  cconn_str.replaceAll("jdbc:mysql://","").replaceAll("\\?","/").replaceAll("user=","").replaceAll("&password=","/").split("/");

        while (res.next()) {
            String dataKey =  res.getString(1);
            String dataType =  res.getString(2);
//            System.out.println(dataKey + "\t" + dataType);
            String ckDataType = Utils.getMysqlParseType(dataType.toLowerCase());

            if(dataKey.equals("id")){
                result.append(" `" + dataKey + "` " + ckDataType + ",\n");
                resultCls.append(" `" + dataKey + "` " + ckDataType + ",\n");
            }else {
                result.append(" `" + dataKey + "` Nullable(" + ckDataType + "),\n");
                resultCls.append(" `" + dataKey + "` Nullable(" + ckDataType + "),\n");
            }

            insertSql.append( dataKey+ ",\n");
        }

        result.append(" `" + "ck_update_date"    + "` Date,\n" );
        resultCls.append(" `" + "ck_update_date" + "` Date,\n" );
        insertSql.append(  "if(update_time is null ,toDate(create_time),toDate(update_time)) \n");

        result =new StringBuffer(  result.substring(0,result.length()-2));

        resultCls =new StringBuffer(  resultCls.substring(0,resultCls.length()-2));
        result.append("\n)ENGINE = MergeTree(ck_update_date, id, 8192);\n\n");
        resultCls.append("\n)ENGINE = Distributed(crm_4shards_1replicas,"+databaseName+" ," +ckTable+ ",rand());\n");
        insertSql.append("FROM mysql('"+jdbcUrlArr[0]+"', '"+jdbcUrlArr[1]+"', '"+dbTableName+"', '"+jdbcUrlArr[2]+"', '"+jdbcUrlArr[3]+"') ;\n");
//        insertSql.append("where toDate(create_time) < '"+ DateUtil.format(new Date(),"yyyy-MM-dd") +"';");

        result.append(resultCls);

        result.append("=============insert into ================\n");
        result.append(insertSql.toString());



        return  result.toString();
    }


  

    @Override
    public String getDisplayString(String[] arg0) {
        return "Mysql2ckTableCreateGenericUDF(jdbcUrl,tableName)";
    }

    @Override
    public ObjectInspector initialize(ObjectInspector[] arg0) throws UDFArgumentException {

        if (arg0.length != 2) {
            throw new UDFArgumentException(" Expecting  two  arguments: jdbcUrl ,tableName");
        }

        // 1. 检查是否接收到正确的参数类型
        ObjectInspector key = arg0[0];

        if (!(key instanceof StringObjectInspector) ) {
            throw new UDFArgumentException("one argument  must be a string");
        }
        this.keyElementOI = (StringObjectInspector) key;


        // 2 检查是否接收到正确的参数类型
        ObjectInspector table = arg0[1];
        if (!(table instanceof StringObjectInspector) ) {
            throw new UDFArgumentException("one argument  must be a string");
        }
        this.tableElementOI = (StringObjectInspector) table;

        return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
    }


}

1.2 utils

package com.crgt;

import java.util.regex.Pattern;

/**
 * @Author: wpp
 * @Date: 2019/10/21 19:41
 */
public class Utils {

    public static  String  HIVE_JDBC_URL= "jdbc:hive2://xx.xx.xx.xx:10000/default";

    public static String getParseType(String oriDataType){

        String dataType = null;

        switch (oriDataType.toLowerCase()){
            case "boolean":
                dataType="Int64";break;
            case "TINYINT":
                dataType="Int64";break;
            case "SMALLINT":
                dataType="Int64";break;
            case "int":
                dataType="Int64";break;
            case "bigint":
                dataType="Int64";break;
            case "float":
                dataType="Float64";break;
            case "double":
                dataType="Float64";break;
            case "decimal":
                dataType="Float64";break;
            case "string":
                dataType="String";break;
            case "datetime":
                dataType="String";break;
            case "timestamp":
                dataType="String";break;
            default:
                dataType="999999999";
        }

        return  dataType;
    }


    public static String getMysqlParseType(String oriDataType){

        String dataType = null;

        if(Pattern.matches(".*varchar.*",oriDataType)
            ||Pattern.matches(".*datetime.*",oriDataType)
            ||Pattern.matches(".*time.*",oriDataType)
            ||Pattern.matches(".*text.*",oriDataType)
        ){
            dataType="String";
        }else if(Pattern.matches(".*bigint.*.*unsigned.*",oriDataType)
                || Pattern.matches(".*tinyint.*.*unsigned.*",oriDataType)
                || Pattern.matches(".*int.*.*unsigned.*",oriDataType)

        ){
            dataType="UInt64";

        }else if(Pattern.matches(".*bigint.*",oriDataType)
                || Pattern.matches(".*tinyint.*",oriDataType)
                || Pattern.matches(".*int.*",oriDataType)
        ){
            dataType="Int64";
        }else if(Pattern.matches(".*float.*",oriDataType)
                || Pattern.matches(".*double.*",oriDataType)
                || Pattern.matches(".*decimal.*",oriDataType)
        ){
            dataType="Float64";
        }else {
            dataType="9999999999999";
        }

        return  dataType;
    }




}

 

2,创建hive的udf函数

create function mysqlinfo as 'com.crgt.Mysql2CKGenericUDF' using jar 'hdfs:///hiveudf/hiveudf_3.8-jar-with-dependencies.jar';

3,在hive上执行语句


select mysqlinfo("jdbc:mysql://10.xx.xx.xx:3306/order_center?user=user123&password=passwd123",'mysql_table_name');

4,执行结构如下:


========================mysql转成ck的建表语句 mysql_table_name ==================
==============mysql原表的格式=============
CREATE TABLE `mysql_table_name` (
  `id` varchar(25) NOT NULL COMMENT '自定义主键',
  `order_id` varchar(25) NOT NULL COMMENT '',
  `p_order_id` varchar(25) NOT NULL COMMENT '',
  `user_no` varchar(25) NOT NULL COMMENT '用户id',
  `wx_openid` varchar(100) DEFAULT NULL COMMENT '',
  `machine_id` varchar(50) DEFAULT NULL COMMENT '',
  `scene_type` tinyint(2) DEFAULT NULL COMMENT '',
  `order_type` tinyint(2) DEFAULT NULL COMMENT '',
  `merchant_shop_id` varchar(50) DEFAULT NULL COMMENT '',
  `shop_name` varchar(50) DEFAULT NULL COMMENT '',
  `site_name` varchar(50) DEFAULT NULL COMMENT '车站',
  `run_no` varchar(10) DEFAULT NULL COMMENT '',
  `section_no` varchar(5) DEFAULT NULL COMMENT '',
  `seat` varchar(10) DEFAULT NULL COMMENT '',
  `opening_time` datetime DEFAULT NULL COMMENT '',
  `riding_time` datetime DEFAULT NULL COMMENT '',
  `delivery_type` tinyint(2) DEFAULT NULL COMMENT ',0:自取,1:送达',
  `user_name` varchar(50) DEFAULT NULL COMMENT '用户名',
  `brands_name` varchar(200) DEFAULT NULL COMMENT '品牌名',
  `mobile` varchar(50) DEFAULT NULL COMMENT '',
  `invoice_type` tinyint(2) DEFAULT NULL COMMENT '0:不开发票,1:开票',
  `delivery_range` varchar(50) DEFAULT NULL COMMENT '',
  `refund_expire_time` datetime DEFAULT NULL COMMENT '',
  `box_count` int(5) DEFAULT NULL COMMENT '',
  `ware_count` int(5) DEFAULT NULL COMMENT '',
  `remark` varchar(150) DEFAULT NULL COMMENT '备注',
  `delivery_start_time` datetime DEFAULT NULL COMMENT '',
  `delivery_end_time` datetime DEFAULT NULL COMMENT '',
  `create_time` datetime DEFAULT NULL COMMENT '创建时间',
  `update_time` datetime DEFAULT NULL COMMENT '修改时间',
  PRIMARY KEY (`id`,`user_no`) USING BTREE,
  KEY `idx_order_id` (`order_id`,`user_no`) USING BTREE,
  KEY `idx_order_pid` (`p_order_id`,`user_no`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='点餐表' shardkey=user_no
==============请确定主键和date类型的列,并把Nullable去掉=============

CREATE TABLE rt_dwd.mysql_table_name on cluster crm_4shards_1replicas (
 `id` String,
 `order_id` Nullable(String),
 `p_order_id` Nullable(String),
 `user_no` Nullable(String),
 `wx_openid` Nullable(String),
 `machine_id` Nullable(String),
 `scene_type` Nullable(Int64),
 `order_type` Nullable(Int64),
 `merchant_shop_id` Nullable(String),
 `shop_name` Nullable(String),
 `site_name` Nullable(String),
 `run_no` Nullable(String),
 `section_no` Nullable(String),
 `seat` Nullable(String),
 `opening_time` Nullable(String),
 `riding_time` Nullable(String),
 `delivery_type` Nullable(Int64),
 `user_name` Nullable(String),
 `brands_name` Nullable(String),
 `mobile` Nullable(String),
 `invoice_type` Nullable(Int64),
 `delivery_range` Nullable(String),
 `refund_expire_time` Nullable(String),
 `box_count` Nullable(Int64),
 `ware_count` Nullable(Int64),
 `remark` Nullable(String),
 `delivery_start_time` Nullable(String),
 `delivery_end_time` Nullable(String),
 `create_time` Nullable(String),
 `update_time` Nullable(String),
 `ck_update_date` Date
)ENGINE = MergeTree(ck_update_date, id, 8192);

CREATE TABLE rt_dwd.mysql_table_name_cls on cluster crm_4shards_1replicas (
 `id` String,
 `order_id` Nullable(String),
 `p_order_id` Nullable(String),
 `user_no` Nullable(String),
 `wx_openid` Nullable(String),
 `machine_id` Nullable(String),
 `scene_type` Nullable(Int64),
 `order_type` Nullable(Int64),
 `merchant_shop_id` Nullable(String),
 `shop_name` Nullable(String),
 `site_name` Nullable(String),
 `run_no` Nullable(String),
 `section_no` Nullable(String),
 `seat` Nullable(String),
 `opening_time` Nullable(String),
 `riding_time` Nullable(String),
 `delivery_type` Nullable(Int64),
 `user_name` Nullable(String),
 `brands_name` Nullable(String),
 `mobile` Nullable(String),
 `invoice_type` Nullable(Int64),
 `delivery_range` Nullable(String),
 `refund_expire_time` Nullable(String),
 `box_count` Nullable(Int64),
 `ware_count` Nullable(Int64),
 `remark` Nullable(String),
 `delivery_start_time` Nullable(String),
 `delivery_end_time` Nullable(String),
 `create_time` Nullable(String),
 `update_time` Nullable(String),
 `ck_update_date` Date
)ENGINE = Distributed(crm_4shards_1replicas,rt_dwd ,mysql_table_name,rand());
=============insert into ================
INSERT INTO rt_dwd.mysql_table_name_cls 
select 
id,
order_id,
p_order_id,
user_no,
wx_openid,
machine_id,
scene_type,
order_type,
merchant_shop_id,
shop_name,
site_name,
run_no,
section_no,
seat,
opening_time,
riding_time,
delivery_type,
user_name,
brands_name,
mobile,
invoice_type,
delivery_range,
refund_expire_time,
box_count,
ware_count,
remark,
delivery_start_time,
delivery_end_time,
create_time,
update_time,
if(update_time is null ,toDate(create_time),toDate(update_time)) 
FROM mysql('xxxx:3306', 'order_center', 'mysql_table_name', 'user123', 'passwd123') ;

 

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值