比如我们从mysql 的数据库里面有几百张表我们需要改变结构放入到oracle库里面,这个时候建表语句不一样,有些我们需要加系统字段,有些需要加分区
如图,表,字段,描述都有
读取这个excle 进行生成建表语句:
上代码:
import pandas as pd
import openpyxl
# 读取表,和清洗异常情况
def get_df(file_name):
all_df = pd.read_excel(file_name)
import re
def get_column_type(x):
if x == 'nan' or x == 'NaN':
return ''
else:
return "(" + x.replace('|', ',') + ")"
all_df['PRECISION'] = all_df['PRECISION'].apply(lambda x: get_column_type(str(x)))
all_df['COLUMN_COMMENT'] = all_df['COLUMN_COMMENT'].apply(
lambda x: '' if str(x) == 'nan' or str(x) == 'NaN' else x.replace('\r\n', ''))
all_df['TABLE_COMMENT'] = all_df['TABLE_COMMENT'].apply(lambda x: '' if str(x) == 'nan' or str(x) == 'NaN' else x)
all_df['ISNULLABLE'] = all_df['ISNULLABLE'].apply(lambda x: 'NOT NULL' if x == 'N' else 'NULL')
return all_df;
# 生成数据的格式
def gen_sql(df, tablename, new_col_str):
df = df.reset_index(drop=True)
# 得到上次生成到那行了,pre 用于标记写到哪里了。
pre = ws.max_row + 1
ws.cell(row=pre + 1, column=1).value = "begin"
ws.cell(row=pre + 2, column=1).value = "Execute immediate 'drop table " + tablename + "';"
ws.cell(row=pre + 3, column=1).value = "exception when others then"
ws.cell(row=pre + 4, column=1).value = "null;"
ws.cell(row=pre + 5, column=1).value = "end;"
ws.cell(row=pre + 6, column=1).value = "/"
ws.cell(row=pre + 8, column=1).value = "/* Create table " + tablename + " */"
ws.cell(row=pre + 9, column=1).value = "CREATE TABLE " + tablename + " ("
for i in range(0, len(df)):
base_row = pre + 10
new_row = pre + 10 + i
# 如果是第一个行,前面不需要逗号
if i == 0:
ws.cell(row=new_row, column=1).value = " " + str(df['COLUMN_NAME'][i]) + " " + str(
df['ORACLE_TYPE'][i]) + "" + str(df["PRECISION"][i]) + " " + str(df['ISNULLABLE'][i])
else:
ws.cell(row=new_row, column=1).value = ", " + str(df['COLUMN_NAME'][i]) + " " + str(
df['ORACLE_TYPE'][i]) + "" + str(df["PRECISION"][i]) + " " + str(df['ISNULLABLE'][i])
ws.cell(row=ws.max_row + 1, column=1).value = new_col_str
new_row = pre + df.shape[0] + 11
if df[df['ISPK'].notnull()].shape[0] == 1:
pk_id_str = str(tuple(df[df['ISPK'].notnull()]['COLUMN_NAME'])).replace("',", '')
pk_id_str = pk_id_str.replace("'", '')
ws.cell(row=new_row, column=1).value = ", CONSTRAINT PK_" + tablename + " PRIMARY KEY "
ws.cell(row=new_row + 1, column=1).value = pk_id_str
elif df[df['ISPK'].notnull()].shape[0] > 1:
pk_id_str = str(tuple(df[df['ISPK'].notnull()]['COLUMN_NAME'])).replace("'", '')
ws.cell(row=new_row, column=1).value = ", CONSTRAINT PK_" + tablename + " PRIMARY KEY "
ws.cell(row=new_row + 1, column=1).value = pk_id_str
else:
print("没有主键")
ws.cell(row=new_row + 2, column=1).value = ")"
##加分区 如果有字段GEN_DATE 就加分区
if 'GEN_DATE' in list(df['COLUMN_NAME']):
part_row = 21
ws.cell(row=new_row + 3, column=1).value = "partition by range(GEN_DATE)"
ws.cell(row=new_row + 4, column=1).value = "("
ws.cell(row=new_row + 5,
column=1).value = " partition P_202001 values less than (to_date('2020-02-01','yyyy-mm-dd')) ,"
ws.cell(row=new_row + 6,
column=1).value = " partition P_202002 values less than (to_date('2020-03-01','yyyy-mm-dd')) ,"
ws.cell(row=new_row + 7,
column=1).value = " partition P_202003 values less than (to_date('2020-04-01','yyyy-mm-dd')) ,"
ws.cell(row=new_row + 8,
column=1).value = " partition P_202004 values less than (to_date('2020-05-01','yyyy-mm-dd')) ,"
ws.cell(row=new_row + 9,
column=1).value = " partition P_202005 values less than (to_date('2020-06-01','yyyy-mm-dd')) ,"
ws.cell(row=new_row + 10,
column=1).value = " partition P_202006 values less than (to_date('2020-07-01','yyyy-mm-dd')) ,"
ws.cell(row=new_row + 11,
column=1).value = " partition P_202007 values less than (to_date('2020-08-01','yyyy-mm-dd')) ,"
ws.cell(row=new_row + 12,
column=1).value = " partition P_202008 values less than (to_date('2020-09-01','yyyy-mm-dd')) ,"
ws.cell(row=new_row + 13,
column=1).value = " partition P_202009 values less than (to_date('2020-10-01','yyyy-mm-dd')) ,"
ws.cell(row=new_row + 14,
column=1).value = " partition P_202010 values less than (to_date('2020-11-01','yyyy-mm-dd')) ,"
ws.cell(row=new_row + 15,
column=1).value = " partition P_202011 values less than (to_date('2020-12-01','yyyy-mm-dd')) ,"
ws.cell(row=new_row + 16,
column=1).value = " partition P_202012 values less than (to_date('2021-01-01','yyyy-mm-dd')) ,"
ws.cell(row=new_row + 17,
column=1).value = " partition P_202101 values less than (to_date('2021-02-01','yyyy-mm-dd')) ,"
ws.cell(row=new_row + 18,
column=1).value = " partition P_202102 values less than (to_date('2021-03-01','yyyy-mm-dd')) ,"
ws.cell(row=new_row + 19,
column=1).value = " partition P_202103 values less than (to_date('2021-04-01','yyyy-mm-dd')) ,"
ws.cell(row=new_row + 20,
column=1).value = " partition P_202104 values less than (to_date('2021-05-01','yyyy-mm-dd')) ,"
ws.cell(row=new_row + 21,
column=1).value = " partition P_202105 values less than (to_date('2021-06-01','yyyy-mm-dd')) ,"
ws.cell(row=new_row + 22,
column=1).value = " partition P_202106 values less than (to_date('2021-07-01','yyyy-mm-dd'))"
ws.cell(row=new_row + 23, column=1).value = ");"
else:
part_row = 0
ws.cell(row=part_row + new_row + 3, column=1).value = ";"
##向后退行
ws.cell(row=part_row + new_row + 4, column=1).value = "--Table extended properties..."
ws.cell(row=part_row + new_row + 5, column=1).value = "comment on table " + tablename + " is '" + \
df['TABLE_COMMENT'].iloc[0] + "';"
ws.cell(row=part_row + new_row + 7, column=1).value = "--Column extended properties;"
for i in range(0, len(df)):
base_row = part_row + pre + df.shape[0] + 11 + 8
new_row = base_row + i
ws.cell(row=new_row, column=1).value = "comment on column " + tablename + "." + df['COLUMN_NAME'].iloc[
i] + " is '" + df['COLUMN_COMMENT'].iloc[i] + "';"
if __name__ == '__main__':
# 读取要生成的sql的表结构@param1 文件名 return 读到的所有数据
all_df = get_df('老预定表结构.xlsx')
# @param1 要写入的模板
wb = openpyxl.load_workbook(r'work_tmplate.xlsx')
# 都写在一个sheet 里面
ws = wb.worksheets[0]
# 加固定顶一行:
new_col_str = ", capxtimestamp TIMESTAMP(6) default systimestamp"
# 所有的表名
table_list = list(all_df.drop_duplicates(['TABLE_NAME'])['TABLE_NAME'])
# 循环生成建表语句
for i in range(0, len(table_list)):
df = all_df[all_df['TABLE_NAME'] == table_list[i]]
gen_sql(df, table_list[i], new_col_str)
# 生产到脚本全都生成到一个excel的sheet里面,如果直接生成.sql 文件的话,文件格式不好控制
# 复制第一列即可
wb.save('SQL8.xlsx')