python根据excel生成Hive_ddl

 1,背景简介         

最近新入职了一家公司 负责数据仓库的ODS开发,主要下游为mysql oracle sqlserver 等数据库,获取ddl后要在hive上建表将数据从上游系统导入到ods ,手工生成建表语句有些费时费力 想着用execl 模板实现批量生成建表语句,说干就干。

2,excel模版配置

excel模版如下:(sheet页命名:Data_Dic)

3,python代码块

import openpyxl
import os,re


def to_hive_type(type):

    type = re.sub('number[(]|numeric[(]','decimal(',type)  # number(p,s) or numeric(p,s) to decimal(p,s)
    """
    if re.match(r'number[(]',type,flags=0):
        return type.replace('number','decimal')
    if re.match(r'numeric[(]',type,flags=0):
        return type.replace('numeric','decimal')
    """ 
    if type in ['number','numeric']:   # number or numeric to 'decimal(32,16)'
        return 'decimal(32,16)'
    if re.match(r'varchar|char|text|tinytext|longtext|blob|clob|binary',type,flags=0):
        return 'string' 
    elif re.match(r'timestamp|date|datetime',type,flags=0):
        return 'timestamp'
    elif re.match(r'int|tinyint|smallint',type,flags=0):
        return 'int'
    elif re.match(r'double|float',type,flags=0):
        return 'double'
    else:
        return type

def excle_2_hiveddl(datebase,excel_file,sqlfile):
  wb = openpyxl.load_workbook(f'{excel_file}')
  sqlfile = f'{sqlfile}'
  # remove if exists sqlfile
  if os.path.exists(sqlfile):
      os.remove(sqlfile)
  ws = wb['Data_Dic'] #specify worksheet name
  max_row = ws.max_row
  for row in range(2,max_row+1): #first row from row2
      table_name = f'{datebase}.' + ws['A'+str(row)].value.lower()
      table_cname = ws['B'+str(row)].value
      col_name = ws['C'+str(row)].value
      col_type = to_hive_type(ws['D'+str(row)].value)
      col_cname = ws['E'+str(row)].value
      col_comment = f"comment '{col_cname}'"
      tab_comment = f"comment '{table_cname}' \r\n partition(dt)\r\n stored as parquet; \r\n"

      if ws['A'+str(row)].value == ws['A'+str(row-1)].value: #if table name is the same, not add create table
         tb_sql = ''
      else:
         tb_sql = '''\nDROP TABLE IF EXISTS %s; \nCREATE TABLE %s\n(\n''' % (table_name,table_name)        
      if  ws['A'+str(row)].value != ws['A'+str(row+1)].value:
         col_sql = '%s %s %s \n )\n' %(col_name,col_type,col_comment)
      else:
         col_sql = '%s %s %s \n,' %(col_name,col_type,col_comment)
      if row < max_row:
         if ws['A'+str(row)].value == ws['A'+str(row+1)].value: #if table name is the same, not add table comments
            sql = tb_sql + col_sql
         else:
            sql = tb_sql + col_sql + tab_comment
      else:
         sql = tb_sql + col_sql + tab_comment
      print(sql)
      with open(r'%s' % sqlfile, 'a',encoding = 'utf-8') as sq: #append sql to sqlfile
        sq.write(sql)



if __name__ == '__main__':
    datebase = 'odss'
    work_pash = '/Users/saber/Documents/python/excel/File'
    excel_file = f"{work_pash}/table.xlsx"
    sqlfile = f'{work_pash}/SQL_DDL.sql'
    excle_2_hiveddl(datebase,excel_file, sqlfile)        

4,生成建表语句预览 

DROP TABLE IF EXISTS odss.cust; 
CREATE TABLE odss.cust
(
id bigint comment '主键' 
,name string comment '姓名' 
,age int comment '年龄' 
,adress string comment '住址' 
,sex string comment '性别' 
,m_aml decimal(16,4) comment '月收入' 
 )
comment '个人信息' 
 partition(dt)
 stored as parquet; 

DROP TABLE IF EXISTS odss.com; 
CREATE TABLE odss.com
(
id bigint comment '主键' 
,name string comment '姓名' 
,scale decimal(19,4) comment '规模' 
,adress string comment '地址' 
 )
comment '公司信息' 
 partition(dt)
 stored as parquet; 

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值