hive迁移

#coding=utf-8
__author__ = 'Administrator'
import pymysql
import sys
reload(sys)
sys.setdefaultencoding("utf-8")


class GetTableinfo:
    #得到表列表
    def getTableList(self,database):
        conn = pymysql.connect(host='127.0.0.1', port=3306, user='pig', passwd='123456', db='hive', charset='utf8')
        import time
        time.sleep(0.01)
        courser=conn.cursor()
        #我们所有的表都是内部表
        courser.execute("select * from tbls where db_id=%s and tbl_type='MANAGED_TABLE' ",database)
        #courser.execute("select * from tbls where db_id=%s and tbl_type='EXTERNAL_TABLE' ",database)
        #得到所有内部部
        value=courser.fetchall()
        courser.close()
        conn.close()
        return  value
    #得到是否是分区表
    def ispartitions(self,tbl_id):
        conn = pymysql.connect(host='127.0.0.1', port=3306, user='pig', passwd='123456', db='hive', charset='utf8')
        import time
        time.sleep(0.01)
        courser=conn.cursor()
        courser.execute("select * from partition_keys where tbl_id='%s '  ORDER BY INTEGER_IDX ",tbl_id)
        value=courser.fetchall()
        courser.close()
        conn.close()
        return value

    def getClouminf(self,DB_ID,SD_ID):
        #select * from columns_v2 where CD_ID =(select CD_ID from sds  a where a.SD_ID='4808262') ;
        conn = pymysql.connect(host='127.0.0.1', port=3306, user='pig', passwd='123456', db='hive', charset='utf8')
        import time
        time.sleep(0.01)
        courser=conn.cursor()
        sql=  "select DISTINCT b.TBL_ID,b.TBL_NAME,a.* from columns_v2 a,tbls b,sds  c where b.DB_ID =%s  and b.SD_ID = c.SD_ID and c.CD_ID=a.CD_ID and b.TBL_ID=%s ORDER  by a.INTEGER_IDX"%(DB_ID,SD_ID)
        courser.execute(sql)
        value=courser.fetchall()
        courser.close()
        conn.close()
        return  value

    def getSEDInof(self,DB_ID,table_ID):
        sql="select  DISTINCT SLIB,b.tbl_name from SERDES a,tbls b,sds c  where b.SD_ID=c.SD_ID and a.SERDE_ID = c.SERDE_ID  and b.DB_ID=%s and b.TBL_ID=%s"%(DB_ID,table_ID)
        import time
        time.sleep(0.01)
        conn = pymysql.connect(host='127.0.0.1', port=3306, user='pig', passwd='123456', db='hive', charset='utf8')
        courser=conn.cursor()
        courser.execute(sql)
        value=courser.fetchall()
        courser.close()
        conn.close()
        return value

    def getFilesTermINATED(self,DB_ID,table_id):
        long_sql="""
SELECT DISTINCT PARAM_VALUE,d.TBL_NAME
FROM serde_params a, serdes b, sds c,tbls d
WHERE (a.PARAM_KEY = 'field.delim' or a.PARAM_KEY ='serialization.format')
AND a.SERDE_ID = b.SERDE_ID
AND c.SERDE_ID = b.SERDE_ID
AND d.SD_ID = c.SD_ID
AND d.DB_ID = %s
AND d.TBL_ID =%s;"""
        sql=long_sql%(DB_ID,table_id)
        import time
        time.sleep(0.02)
        conn = pymysql.connect(host='127.0.0.1', port=3306, user='pig', passwd='123456', db='hive', charset='utf8')
        courser=conn.cursor()
        courser.execute(sql)
        value=courser.fetchall()
        courser.close()
        conn.close()
        return value

    def getInputFormat(self,DB_ID,table_id):
        long_sql="select b.TBL_NAME, a.INPUT_FORMAT,a.OUTPUT_FORMAT,a.LOCATION   from sds a ,tbls b where a.SD_ID=b.SD_ID and b.DB_ID=%s and b.TBL_ID=%s "
        sql=long_sql%(DB_ID,table_id)
        import time
        time.sleep(0.01)
        conn = pymysql.connect(host='127.0.0.1', port=3306, user='pig', passwd='123456', db='hive', charset='utf8')
        courser=conn.cursor()
        courser.execute(sql)
        value=courser.fetchall()
        courser.close()
        conn.close()
        return value

    def delpartitions(self,sds_id):
        self.getClouminf(self,sds_id)
        return None

    def delnopartitions(self,sds_id):
        return None

    def get_create_table(self,table_name):
        LONGSQL="""DROP TABLE IF EXISTS %s;
CREATE TABLE %s ("""%(table_name,table_name)
        return LONGSQL

    def get_create_cloume(self,columns_list):
        sql=""
        len_col=len(columns_list)
        tmp_item=1
        for column in   columns_list:
            if tmp_item == len_col:
                if column[3] == None:
                    sql=sql+"  `"+column[4]+"` " + column[5]+")"
                else:
                    sql=sql+"  `"+column[4]+"` " + column[5]+" COMMENT '"+ column[3]+"')"
            else:
                if column[3] == None:
                    sql=sql+"  `"+column[4]+"` " + column[5]+",\n"
                else:
                    sql=sql+"  `"+column[4]+"` " + column[5]+" COMMENT '"+ column[3]+"',\n"

            tmp_item=tmp_item+1
        return  sql

    def get_partion_item(self,partitionlist):
        sql="PARTITIONED BY (\n"
        len_col=len(partitionlist)
        tmp_item=1
        #print partitionlist
        for column in   partitionlist:
            if tmp_item == len_col:
                if column[1] == None:
                    sql=sql+"  `"+column[2]+"` " + column[3]+")"
                else:
                    sql=sql+"  `"+column[2]+"` " + column[3]+" COMMENT '"+ column[1]+"')"
            else:
                if column[1] == None:
                    sql=sql+"  `"+column[2]+"` " + column[3]+",\n"
                else:
                    sql=sql+"  `"+column[2]+"` " + column[3]+" COMMENT '"+ column[1]+"',\n"

            tmp_item=tmp_item+1
        return  sql

    def get_filerow_file_data(self,serintf,clome_file,inoutFormat,table_name):
        st1r='\t'
        last_sql=""
        if serintf[0][0] == 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe':
            if str(clome_file[0][0]) == str(1):
                last_sql="""ROW FORMAT SERDE
 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
STORED AS INPUTFORMAT
 '%s'
OUTPUTFORMAT
 '%s';
 """%(inoutFormat[0][1],inoutFormat[0][2])
                return last_sql
            else:
                if clome_file[0][0] == u'\t':
                    st1r=r'\t'
                elif clome_file[0][0] == u',':
                    st1r=","
                elif clome_file[0][0] == u'$':
                    st1r=r"$"
                elif clome_file[0][0] == u'\005':
                    st1r=r'\\005'
                elif clome_file[0][0] == r'|':
                    st1r="|"
                last_sql="""ROW FORMAT DELIMITED
 FIELDS TREMINATED BY '%s'
 LINES TREMINATED BY '\\n'
 STORED AS INPUTFORMAT
 '%s'
OUTPUTFORMAT
 '%s';
"""%(st1r,inoutFormat[0][1],inoutFormat[0][2])
                return last_sql
        else:
            last_sql="""ROW FORMAT DELIMITED
 FIELDS TREMINATED BY '\t'
 LINES TERMINATED BY '\\n'
 STORED AS INPUTFORMAT
 '%s'
OUTPUTFORMAT
 '%s';
"""%(inoutFormat[0][1],inoutFormat[0][2])
            sql_alter="alter table %s  SET SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'; "%(table_name)
            last_sql=last_sql+"\n"+sql_alter
            return last_sql



if __name__ == "__main__":
    dbs_id="351"
    table_list = GetTableinfo().getTableList(dbs_id)
    print table_list
    for table in table_list:
        table_name = table[7]
        table_tbl_id= table[0]
        sds_id=table[6]
        ##得到表的所有字段
        columns=GetTableinfo().getClouminf(dbs_id,table_tbl_id)
        #得到是否是分区表
        partitionsvalue=GetTableinfo().ispartitions(table_tbl_id)
        if len(partitionsvalue) != 0 :
            #不是分区表
            columns_len = len(columns)
            partitionsvalue_len= len(partitionsvalue)
            get_len=columns_len-partitionsvalue_len
            #表字段
            columns_list=columns[0:get_len+1]
            #分区字段
            partitionlist=partitionsvalue
            #ser字段
            serintf = GetTableinfo().getSEDInof(dbs_id,table_tbl_id)
            #得到行文分隔符,和列分隔符
            hive_line='\n'
            #列分隔符
            clome_file= GetTableinfo().getFilesTermINATED(dbs_id,table_tbl_id)
            #输入,输出格式
            inoutFormat= GetTableinfo().getInputFormat(dbs_id,table_tbl_id)
            create_table_sql=GetTableinfo().get_create_table(table_name)
            create_table_colume=GetTableinfo().get_create_cloume(columns_list)
            create_talbe_partion=GetTableinfo().get_partion_item(partitionlist)
            #print create_table_sql+' \n'+create_table_colume+'\n'+create_talbe_partion
            last_sql= GetTableinfo().get_filerow_file_data(serintf,clome_file,inoutFormat,table_name)
            myfilesql=""
            try:
                myfilesql= create_table_sql+' \n'+create_table_colume+'\n'+create_talbe_partion+'\n'+last_sql
                with open("c:\\base.txt","a+") as myfile:
                     myfile.write(str(myfilesql).decode("utf-8"))
            finally:
                pass

        else:
            #print columns_list
            #表字段
            columns_list=columns
            #ser字段
            serintf=GetTableinfo().getSEDInof(dbs_id,table_tbl_id)
            print serintf
            if serintf == ():
                continue
            #得到行文分隔符,和列分隔符
            hive_line='\n'
            #列分隔符
            clome_file= GetTableinfo().getFilesTermINATED(dbs_id,table_tbl_id)
            #输入,输出格式
            inoutFormat= GetTableinfo().getInputFormat(dbs_id,table_tbl_id)
            create_table_sql=GetTableinfo().get_create_table(table_name)
            create_table_colume=GetTableinfo().get_create_cloume(columns_list)
            last_sql=GetTableinfo().get_filerow_file_data(serintf,clome_file,inoutFormat,table_name)
            myfilesql=""
            try:
                myfilesql= "\n"+create_table_sql+'\n'+create_table_colume+'\n'+last_sql
                with open("c:\\base.txt","a+") as myfile:
                    myfile.write(myfilesql.decode("utf-8"))
            #myfilesql= create_table_sql+' \n'+create_table_colume+'\n'+create_talbe_partion+'\n'+last_sql

            finally:
                pass



















  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值