mysql建表语句转为hive建表语句,mysql ddl转为hive ddl

把MySQL建表语句往里面放就行

 

import re

'''
公共内容
'''
original_content = '''PARTITIONED BY (
    `pt_platform` string)
ROW FORMAT SERDE
    'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
    WITH SERDEPROPERTIES (
    'field.delim' = '\\t',
    'line.delim' = '\\n',
    'serialization.format' = '\\t',
    'serialization.null.format' = '')
STORED AS TEXTFILE;'''

'''
取列表的第一个元素
'''
def get_first_element(lst, default_value=""):
    if lst:
        return lst[0]
    else:
        return default_value


'''
太邪门,.find("")函数找到返回0,找不到返回-1
ddl语句分析,拆分
'''
def ddl_analyse(ddl):
    lines = ddl.splitlines()
    # 表名
    tbname = ''
    # 表正则表达
    # tbname_pattern = r'CREATE TABLE `(\w+)`'
    tbname_pattern = r"CREATE TABLE `(\w+)`.`(\w+)`"
    # 字段列表
    columns = []
    # 字段正则表达
    column_pattern = r'(?<*,|\n)\s*`(\w+)`'
    # 注释列表
    comments = []
    # 注释正则表达
    comment_pattern = r"COMMENT \'(.*)\'"
    # 类型列表
    types = []
    # 表注释正则表达
    tb_comment_pattern = r"COMMENT='(.*)'"
    tb_comment = ''
    for line in lines:
        line = line.strip()
        words = line.split(" ")
        if "CREATE" in line.upper() and "TABLE" in line.upper():
            tbname = words[len(words)-2]
        elif ("NOT NULL" in line.upper() or "DEFAULT" in line.upper()) and "ENGINE=" not in line.upper():
            columns.append(words[0])
            comments.append(get_first_element(re.findall(comment_pattern, line)))
            types.append(change_type(words[1]))
        elif "ENGINE=" in line.upper() or "CHARSET=" in line.upper():
            tb_comment = get_first_element(re.findall(tb_comment_pattern, line))

    return tbname, tb_comment, columns, comments, types


'''
mysql字段类型转化成hive字段类型
# type: mysql => hive sql
'''
def change_type(line):
    if re.search(r"int\((\d+)\)", line) != None:
        return 'INT'
    elif re.search(r"decimal\((\d+),\s*(\d+)\)", line) != None:
        return next(re.finditer(r"decimal\((\d+),\s*(\d+)\)", line)).group()
    elif re.search(r'varchar', line) != None:
        return 'STRING'
    elif re.search(r'timestamp', line) != None:
        return 'STRING'
    elif re.search(r'text', line) != None:
        return 'STRING'
    elif re.search(r'datetime', line) != None:
        return 'STRING'
    elif re.search(r'date', line) != None:
        return 'STRING'
    elif re.search(r'char', line) != None:
        return 'STRING'
    else:
        return str(line)


"""
生成Hive建表语句
:return:
"""
def mysql_to_hsql(ddl):
    tbname, tb_comment, columns, comments, types = ddl_analyse(ddl)
    hive_ddl = "CREATE TABLE `stage_power`." + str(tbname + "(\n")
    add_sql = ''
    collect_mysql_query = 'select \n'
    ods_hive_query = 'select \n'
    for i in range(len(columns)):
        if (i == len(columns) - 1):
            hive_ddl = hive_ddl + "     " + str(columns[i]) + " " + str(
                change_type(types[i])) + " COMMENT \'" + str(comments[i]) + "\'\n"
            if types[i] == "STRING":
                collect_mysql_query = collect_mysql_query + "    ifnull(replace(replace({}, CHAR(13), ''), CHAR(10),  ''), '')".format(
                    columns[i]) + '\n'
                collect_mysql_query = collect_mysql_query + 'from {}'.format(tbname)
            else:
                collect_mysql = str(collect_mysql_query + "    " + columns[i]) + '\n'
                collect_mysql_query = collect_mysql + 'from {}'.format(tbname)
            add_sql += "ALTER TABLE {} ADD COLUMNS ({} {} COMMENT '{}') CASCADE;".format(tbname, columns[i], types[i],
                                                                                        comments[i]) + '\n'
            ods_hive_query = ods_hive_query + "    " +columns[i].replace('`','') + "\n"+"from stage_power."+tbname.replace('`','')

            break
        hive_ddl = hive_ddl + "     " + str(columns[i]) + " " + str(
            change_type(types[i])) + " COMMENT \'" + str(comments[i]) + "\',\n"

        #print(columns[i]+",")
        ods_hive_query = ods_hive_query + "    " + columns[i].replace('`','') + ","+ "\n"
        if types[i] == "STRING":
            collect_mysql_query = collect_mysql_query + "    ifnull(replace(replace({}, CHAR(13), ''), CHAR(10),  ''), ''),".format(
                columns[i]) + '\n'
        else:
            collect_mysql_query = str(collect_mysql_query + "    " + columns[i]) + ',\n'
        add_sql += "ALTER TABLE {} ADD COLUMNS ({} {} COMMENT '{}') CASCADE;".format(tbname, columns[i], types[i],
                                                                                     comments[i]) + '\n'

    hive_ddl = hive_ddl + ") COMMENT \'" + tb_comment + "\'\n"
    return hive_ddl + original_content, add_sql,collect_mysql_query.replace('`',''),ods_hive_query


if __name__ == '__main__':
    ddl = '''
CREATE TABLE `_detail_tb` (
  `code` int(11) NOT NULL AUTO_INCREMENT COMMENT '主键',
  `resettlement_code` int(11) NOT NULL COMMENT '安置合同code',
   PRIMARY KEY (`code`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=2945 DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC COMMENT='明细表'
    '''
    # tbname, tb_comment, columns, comments, types = ddl_analyse(ddl)
    hive_ddl, add_sql,collect_mysql,ods_hive_query = mysql_to_hsql(ddl)
    print(hive_ddl)
    print("=" * 100)
    print("=" * 100)
    print(add_sql)
    print("=" * 100)
    print("=" * 100)
    print(collect_mysql)
    print("=" * 100)
    print("=" * 100)
    print(ods_hive_query)

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值