使用python导出Hive1.1版本的所有库表建表语句DDL
# -*- coding:utf-8 -*-
from pyhive import hive
def generate_create_tabel_ddl(table_info):
filter_content = {'TBLPROPERTIES', 'ROW', 'OUTPUTFORMAT', 'COLUMN_STATS_ACCURATE','WITH','LOCATION','STORED'}
filter_keys = {'COLUMN_STATS_ACCURATE', 'numFiles', 'numRows', 'rawDataSize', 'totalSize', 'transient_lastDdlTime',
'creator','create_time','serialization.forma','field.delim','last_modified_time','last_modified_by'
'colelction.delim','mapkey.delim','line.delim','serialization.null','serialization.null.format',
'last_modified_by','colelction.delim'}
table_info = [info[0] for info in table_info if not (
any(info[0].startswith(s) for s in filter_content) or
any(key in info[0] for key in filter_keys) or
any('org.apache.hadoop' in part for part in info[0].split()) or
any('hdfs://HDFS-HA/user/' in part for part in info[0].split())
)]
table_ddl = " \n".join(table_info)
return table_ddl
# 连接到 Hive
connection = hive.connect(host='your_host', port=10000, username='your_user_name')
# 创建 cursor
cursor = connection.cursor()
# 打开文件准备写入
with open("hive_tables_info.sql", "w", encoding="utf-8") as file:
# 获取所有的数据库
cursor.execute("SHOW DATABASES")
databases = cursor.fetchall()
# 遍历每个数据库
for database in databases:
database_name = database[0]
cursor.execute(f"DESCRIBE DATABASE EXTENDED {database_name} ")
databases_info = cursor.fetchall()
print(databases_info)
databases_ddl_sql = f"CREATE DATABASE IF NOT EXISTS {databases_info[0][0]} COMMENT '{databases_info[0][1]}' WITH DBPROPERTIES ('owner'='{databases_info[0][3]}', 'location'= '{databases_info[0][2]}' );"
file.write(f"##{database_name}\n {databases_ddl_sql}\n")
# 获取数据库的建表语句
print(database_name)
file.write(f"USE {database_name};\n")
cursor.execute(f"SHOW TABLES IN {database_name}")
tables = cursor.fetchall()
# 遍历每个表
for table in tables:
table_name = table[0]
print(table_name)
file.write(f"###Table: {table_name}\n")
# 获取表的建表语句
cursor.execute(f"SHOW CREATE TABLE {database_name}.{table_name}")
create_table_statement = cursor.fetchall()
table_ddl_sql = generate_create_tabel_ddl(create_table_statement)
print(table_ddl_sql)
file.write(f"{table_ddl_sql};\n\n")
# 关闭连接
cursor.close()
connection.close()