python脚本:Teradata 转换成bigquery的DDL

python脚本
用于Teradata 装换成bigquery的DDL
包含了varchar、char、bigint、date、decimal转换
例如:
TD:
CREATE MULTISET TABLE XXXX.AAA ,FALLBACK,
NO BEFORE JOURNAL,
NO AFTER JOURNAL,
CHECKSUM=DEFAULT,
DEFAULT MERGEBLOCKRATO,
MAP=TD_MAP1
(
ID CHAR(5) CHARACTER SET LATIN CASESPCIFIC NOT NULL,
AMOUNT DECIMAL(23,3) COMPRESS 0.000,
NUM BIGINT NOT NULL,
DAY DATE FORMAT ‘yyyy-mm-dd’ NOT NULL,
NAME VARCHAR(250) CHARACTER SET LATIN CASESPECIFIC NOT NULL,
FLAG CHAR(1) CHARACTER SET LATIN CASESPECIFIC NOT NULL DEFAULT ‘Y’
)
PRIMARY INDEX(‘ID’);

BQ:
CREATE OR REPLACE TABLE YYYY.AAA(
ID STRING NOT NULL,
AMOUNT NUMERIC,
NUM INT64 NOT NULL,
DAY DATE FORMAT 'yyyy-m
DAY DATE NOT NULL,
NAME NOT NULL,
FLAG NOT NULL
)
;


import paramiko
import re
import csv
import time
import numpy as np
import pandas as pd
import os

def sql_del(TD_DDL,data)
#schema_td为TD原数据库名,schema_bq为BQ数据库名
	schema_td='xxxx.'
	schema_bq='yyyy.'
	BQ_DDL=re.sub(r'(?ms),.*?\($',TD_DDL).replace('MULTISET','OR REPLACE').replace(schema_td,schema_bq)

	index=re.findall(r'(?ms)PRIMARY INDEX.*?;$'|UNIQUE PRIMARY INDEX.*?;$',BQ_DDL)
	
	BQ_DDL=re.sub(r'(?ms)PRIMARY INDEX.*?;$'|UNIQUE PRIMARY INDEX.*?;$',';',BQ_DDL)
	
	table=re.findall(r'(?ms)XXXX\..*?\($',BQ_DDL)
	table_name=str(table).replace('(','').replace('[','').replace(']','').replace('\'','')
	
	if len(index)!=0:
		mark={}
		mark['table_name']=table_name
		mark['default']='PRI'
		mark['index']=str(index).replace('[','').replace(']','').replace('\'','')
		data.append(mark)
	
	#处理VARCHAR和CHAR类型
	com=re.findall(r'(?ms)VARCHAR.*?\,$|CHAR.*?\,$|VARCHAR.*?\)$|CHAR.*?\)$',BQ_DDL)
	
	for in in com:
		mark={}
		if i.find('DEFAULT')!=-1:
			mark['table_name']=table_name
			mark['default']=i
			mark['index']=str(index).replace('[','').replace(']','').replace('\'','')
			data.append(mark)
		if i[-1]==')':
			if i.find('NOT NULL')!=-1:
				BQ_DDL=BQ_DDL.replace(i,'STRING NOT NULL)')
			else:
				BQ_DDL=BQ_DDL.replace(i,'STRING)')
		else:
			if i.find('NOT NULL')!=-1:
				BQ_DDL=BQ_DDL.replace(i,'STRING NOT NULL,')
			else:
				BQ_DDL=BQ_DDL.replace(i,'STRING,')	
			
	#处理BIGINT类型
	com=re.findall(r'(?ms)BIGINT.*?\,$|BIGINT.*?\)$',BQ_DDL)
	
	for in in com:
		mark={}
		if i.find('DEFAULT')!=-1:
			mark['table_name']=table_name
			mark['default']=i
			mark['index']=str(index).replace('[','').replace(']','').replace('\'','')
			data.append(mark)
		if i[-1]==')':
			if i.find('NOT NULL')!=-1:
				BQ_DDL=BQ_DDL.replace(i,'INT64 NOT NULL)')
			else:
				BQ_DDL=BQ_DDL.replace(i,'INT64 )')
		else:
			if i.find('NOT NULL')!=-1:
				BQ_DDL=BQ_DDL.replace(i,'INT64 NOT NULL,')
			else:
				BQ_DDL=BQ_DDL.replace(i,'INT64 ,')	
	#处理DECIMAL类型
	com=re.findall(r'(?ms)DECIMAL.*?\,$|DECIMAL.*?\)$',BQ_DDL)
	
	for in in com:
		mark={}
		if i.find('DEFAULT')!=-1:
			mark['table_name']=table_name
			mark['default']=i
			mark['index']=str(index).replace('[','').replace(']','').replace('\'','')
			data.append(mark)
		if i[-1]==')':
			if i.find('NOT NULL')!=-1:
				BQ_DDL=BQ_DDL.replace(i,'NUMERIC NOT NULL)')
			else:
				BQ_DDL=BQ_DDL.replace(i,'NUMERIC )')
		else:
			if i.find('NOT NULL')!=-1:
				BQ_DDL=BQ_DDL.replace(i,'NUMERIC NOT NULL,')
			else:
				BQ_DDL=BQ_DDL.replace(i,'NUMERIC ,')	
	#处理DATE类型
	com=re.findall(r'(?ms)DATE.*?\,$|DATE.*?\)$',BQ_DDL)
	
	for in in com:
		mark={}
		if i.find('DEFAULT')!=-1:
			mark['table_name']=table_name
			mark['default']=i
			mark['index']=str(index).replace('[','').replace(']','').replace('\'','')
			data.append(mark)
		if i[-1]==')':
			if i.find('NOT NULL')!=-1:
				BQ_DDL=BQ_DDL.replace(i,'DATE NOT NULL)')
			else:
				BQ_DDL=BQ_DDL.replace(i,'DATE)')
		else:
			if i.find('NOT NULL')!=-1:
				BQ_DDL=BQ_DDL.replace(i,'DATE NOT NULL,')
			else:
				BQ_DDL=BQ_DDL.replace(i,'DATE,')

	return BQ_DDL,table_name,data



#############main################
o=os.walk(r'要更换的TD文件的路径')
data=[]
#循环读取Windows制定目录下文件,并根据sql_del处理写入到存放路径
for path,dir_list,file_name in o:
	 for i in file_name:
	 	reader=open(path+i,'r')
	 	TD_DDL=reader.read()
	 	BQ_DDL,table_name,data=sql_del(TD_DDL,data)
	 	reader.close()
		
		fo=open(r'生成BQ的DDL文件的存放路径')
		fo.write(BQ_DDL)
		fo.close()
		default_mark=pd.DataFrame(data,columns=['table_name','default','index'])
#记录TD中原有的table对应的default默认值和index`在这里插入代码片`
default_mark.to_csv(r'把没有转换的default和index记录在csv文件中,这里为csv文件保存路径',index=False)	

以上代码类型转换部分还可以优化,暂时先记录这些,后期再做优化

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值