根据mysql元数据还原hive建表语句

概述

  • 公司集群崩了,hive访问不上,想要建表语句
  • 功能是一次性的(不要有下次了),就用python写了

适用场景

  • Hive版本:Hive release version 1.1.0 其他自行测试
  • CDH版本:1.1.0-cdh5.12.0 其他自行测试
  • 一般的hive表,不带分桶的,分桶表的没做
  • 压缩的也没做,所以有压缩表的建表语句也不完整

前期准备

  • 要对mysql里的hive数据库结构有一定的了解,请访问以下网址:Hive的元数据表结构详解
  • mysql的链接信息根据自己的配置更改

代码

#!/usr/bin/env python
#-*- coding:utf8 -*-
# 从mysql中提取hive建表语句
import os
import fileinput
import datetime
import mysql.connector

def hive_create_table():
    #conn = mysql.connector.connect(host="11.11.11.11",user='root',passwd='root',database='hive')
    conn = mysql.connector.connect(host="localhost",user='root',database='hive')
    mycursor = conn.cursor()
     
    # 获取DB_ID
    select_DB_ID = "select DB_ID from DBS;"
    mycursor.execute(select_DB_ID)
    result_DB_ID = mycursor.fetchall()
    fo = open("foo.txt", "w")
    for dir_DB_ID in result_DB_ID :
        # 获取数据库名
        DB_ID = str(dir_DB_ID)[1:].split(',')[0]
      	print(DB_ID)
        select_DB_NAME = "select NAME from DBS where DB_ID="+DB_ID+";"
        print(select_DB_NAME )
        mycursor.execute(select_DB_NAME)
        result_DB_NAME = mycursor.fetchone()
        fo.write("\n===========数据库:"+str(result_DB_NAME).split('\'')[1]+"===========\n")
      	print(result_DB_NAME)
        # 获取表名
        select_table_name_sql = "select TBL_NAME from TBLS where DB_ID="+DB_ID+";"
        mycursor.execute(select_table_name_sql)
        result_table_names = mycursor.fetchall()
        for table_name in result_table_names :
            fo.write("\nCREATE TABLE `"+str(table_name).split('\'')[1]+"`(\n")
            # 根据表名获取SD_ID
            select_table_SD_ID = "select SD_ID from TBLS where tbl_name='"+str(table_name).split('\'')[1]+"' and DB_ID="+DB_ID+";"
            print(select_table_SD_ID)
            mycursor.execute(select_table_SD_ID)
            result_SD_ID = mycursor.fetchone()
	          print(result_SD_ID )
            # 根据SD_ID获取CD_ID
            SD_ID=str(result_SD_ID)[1:].split(',')[0]
            select_table_CD_ID = "select CD_ID from SDS where SD_ID="+str(result_SD_ID)[1:].split(',')[0]+";"
            print(select_table_CD_ID)
            mycursor.execute(select_table_CD_ID)
            result_CD_ID = mycursor.fetchone()
            print(result_CD_ID)        
            # 根据CD_ID获取表的列
            CD_ID=str(result_CD_ID)[1:].split(',')[0]
            select_table_COLUMN_NAME = "select COLUMN_NAME,TYPE_NAME from COLUMNS_V2 where CD_ID="+str(result_CD_ID)[1:].split(',')[0]+" order by INTEGER_IDX;"
            print(select_table_COLUMN_NAME)
            mycursor.execute(select_table_COLUMN_NAME)
            result_COLUMN_NAME = mycursor.fetchall()
            print(result_COLUMN_NAME)        
       	    index=0
            for col,col_type in result_COLUMN_NAME:
                print(col)
                print(col_type)
         	    	print(len(result_COLUMN_NAME) )
            		# 写入表的列和列的类型到文件
                fo.write("  `"+str(col)+"`  "+str(col_type))
                if index < len(result_COLUMN_NAME)-1:
	                 index = index + 1
                   fo.write(",\n")
            		elif index == len(result_COLUMN_NAME)-1:
                   fo.write(")")
            # 根据表名获取TBL_ID
            select_table_SD_ID = "select TBL_ID from TBLS where tbl_name='"+str(table_name).split('\'')[1]+"' and DB_ID="+DB_ID+";"
            print(select_table_SD_ID)
            mycursor.execute(select_table_SD_ID)
            result_TBL_ID = mycursor.fetchone()
       	    print(result_TBL_ID)
            # 根据TBL_ID获取分区信息
            select_table_PKEY_NAME_TYPE = "select PKEY_NAME,PKEY_TYPE from PARTITION_KEYS where TBL_ID="+str(result_TBL_ID)[1:].split(',')[0]+" order by INTEGER_IDX;"
            print(select_table_PKEY_NAME_TYPE)
            mycursor.execute(select_table_PKEY_NAME_TYPE)
            result_PKEY_NAME_TYPE = mycursor.fetchall()
      	    print(result_PKEY_NAME_TYPE)
            if len(result_PKEY_NAME_TYPE) > 0:
            #if result_PKEY_NAME_TYPE is not None:
               fo.write("\nPARTITIONED BY (\n")
            #elif len(result_PKEY_NAME_TYPE) == 0:
            else :
               fo.write("\n")
            i=0
            for pkey_name,pkey_type in result_PKEY_NAME_TYPE:
                fo.write("  `"+str(pkey_name)+"`  "+str(pkey_type))
                if i < len(result_PKEY_NAME_TYPE)- 1:
	                 i = i + 1
                   fo.write(",\n")
            		elif i == len(result_PKEY_NAME_TYPE) - 1:
                   fo.write(")\n")
            # 根据SD_ID和CD_ID获取SERDE_ID
            select_SERDE_ID = "select SERDE_ID from SDS where SD_ID="+SD_ID+" and CD_ID="+CD_ID+";"
            print(select_SERDE_ID)
            mycursor.execute(select_SERDE_ID)
            result_SERDE_ID = mycursor.fetchone()
       	    print(result_SERDE_ID)
            # 根据SERDE_ID获取PARAM_VALUE(列分隔符)
            select_PARAM_VALUE = "select PARAM_VALUE from SERDE_PARAMS where SERDE_ID="+str(result_SERDE_ID)[1:].split(",")[0]+" and PARAM_KEY='field.delim';"
            print(select_PARAM_VALUE)
            mycursor.execute(select_PARAM_VALUE)
            result_PARAM_VALUE = mycursor.fetchone()
      	    print(result_PARAM_VALUE)
            if result_PARAM_VALUE is not None:
               fo.write("ROW FORMAT DELIMITED\n")
               fo.write("FIELDS TERMINATED BY '"+str(result_PARAM_VALUE).split('\'')[1]+"'\n")
            # 根据SD_ID和CD_ID获取输入输出格式
            select_table_STORE_FORMAT = "select INPUT_FORMAT from SDS where SD_ID="+SD_ID+" and CD_ID="+CD_ID+";"
            print(select_table_STORE_FORMAT)
            mycursor.execute(select_table_STORE_FORMAT)
            result_table_STORE_FORMAT= mycursor.fetchall()
      	    print(result_table_STORE_FORMAT)
            for store_format in result_table_STORE_FORMAT:
                if "Orc" in str(store_format):
                   fo.write("STORED AS ORC;\n")
                elif "RCFile" in str(store_format):
                   fo.write("STORED AS RCFILE;\n")
                else :
                   fo.write("STORED AS TEXTFILE;\n")
                   #fo.write(";\n")

    fo.close()
hive_create_table()

转载于:https://my.oschina.net/u/4017963/blog/3035340

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值