使用python导出Hive1.1版本的所有库表建表语句DDL

使用python导出Hive1.1版本的所有库表建表语句DDL

# -*- coding:utf-8 -*-

from pyhive import hive


def generate_create_tabel_ddl(table_info):
    filter_content = {'TBLPROPERTIES', 'ROW', 'OUTPUTFORMAT', 'COLUMN_STATS_ACCURATE','WITH','LOCATION','STORED'}
    filter_keys = {'COLUMN_STATS_ACCURATE', 'numFiles', 'numRows', 'rawDataSize', 'totalSize', 'transient_lastDdlTime',
                   'creator','create_time','serialization.forma','field.delim','last_modified_time','last_modified_by'
                   'colelction.delim','mapkey.delim','line.delim','serialization.null','serialization.null.format',
                   'last_modified_by','colelction.delim'}
    table_info = [info[0] for info in table_info if not (
            any(info[0].startswith(s) for s in filter_content) or
            any(key in info[0] for key in filter_keys) or
            any('org.apache.hadoop' in part for part in info[0].split()) or
            any('hdfs://HDFS-HA/user/' in part for part in info[0].split())
    )]
    table_ddl = "  \n".join(table_info)
    return table_ddl


# 连接到 Hive
connection = hive.connect(host='your_host', port=10000, username='your_user_name')

# 创建 cursor
cursor = connection.cursor()

# 打开文件准备写入
with open("hive_tables_info.sql", "w", encoding="utf-8") as file:
    # 获取所有的数据库
    cursor.execute("SHOW DATABASES")
    databases = cursor.fetchall()

    # 遍历每个数据库
    for database in databases:
        database_name = database[0]
        cursor.execute(f"DESCRIBE DATABASE EXTENDED {database_name} ")
        databases_info = cursor.fetchall()
        print(databases_info)
        databases_ddl_sql = f"CREATE DATABASE IF NOT EXISTS {databases_info[0][0]} COMMENT  '{databases_info[0][1]}'  WITH DBPROPERTIES ('owner'='{databases_info[0][3]}', 'location'= '{databases_info[0][2]}' );"
        file.write(f"##{database_name}\n {databases_ddl_sql}\n")

        # 获取数据库的建表语句
        print(database_name)

        file.write(f"USE {database_name};\n")
        cursor.execute(f"SHOW TABLES IN {database_name}")
        tables = cursor.fetchall()

        # 遍历每个表
        for table in tables:
            table_name = table[0]
            print(table_name)
            file.write(f"###Table: {table_name}\n")

            # 获取表的建表语句
            cursor.execute(f"SHOW CREATE TABLE {database_name}.{table_name}")
            create_table_statement = cursor.fetchall()
            table_ddl_sql = generate_create_tabel_ddl(create_table_statement)
            print(table_ddl_sql)
            file.write(f"{table_ddl_sql};\n\n")

# 关闭连接
cursor.close()
connection.close()

  • 9
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值