Python 根据字段自动生成建表语句

比如我们从mysql 的数据库里面有几百张表我们需要改变结构放入到oracle库里面,这个时候建表语句不一样,有些我们需要加系统字段,有些需要加分区

如图,表,字段,描述都有

读取这个excle 进行生成建表语句:

上代码:

import pandas as pd
import openpyxl


# 读取表,和清洗异常情况
def get_df(file_name):
    all_df = pd.read_excel(file_name)
    import re
    def get_column_type(x):
        if x == 'nan' or x == 'NaN':
            return ''

        else:
            return "(" + x.replace('|', ',') + ")"

    all_df['PRECISION'] = all_df['PRECISION'].apply(lambda x: get_column_type(str(x)))
    all_df['COLUMN_COMMENT'] = all_df['COLUMN_COMMENT'].apply(
        lambda x: '' if str(x) == 'nan' or str(x) == 'NaN' else x.replace('\r\n', ''))
    all_df['TABLE_COMMENT'] = all_df['TABLE_COMMENT'].apply(lambda x: '' if str(x) == 'nan' or str(x) == 'NaN' else x)
    all_df['ISNULLABLE'] = all_df['ISNULLABLE'].apply(lambda x: 'NOT NULL' if x == 'N' else 'NULL')
    return all_df;


# 生成数据的格式
def gen_sql(df, tablename, new_col_str):
    df = df.reset_index(drop=True)
    # 得到上次生成到那行了,pre 用于标记写到哪里了。
    pre = ws.max_row + 1

    ws.cell(row=pre + 1, column=1).value = "begin"

    ws.cell(row=pre + 2, column=1).value = "Execute immediate 'drop table " + tablename + "';"
    ws.cell(row=pre + 3, column=1).value = "exception when others then"
    ws.cell(row=pre + 4, column=1).value = "null;"
    ws.cell(row=pre + 5, column=1).value = "end;"
    ws.cell(row=pre + 6, column=1).value = "/"
    ws.cell(row=pre + 8, column=1).value = "/* Create table " + tablename + " */"
    ws.cell(row=pre + 9, column=1).value = "CREATE TABLE " + tablename + " ("
    for i in range(0, len(df)):
        base_row = pre + 10
        new_row = pre + 10 + i
        # 如果是第一个行,前面不需要逗号
        if i == 0:
            ws.cell(row=new_row, column=1).value = "   " + str(df['COLUMN_NAME'][i]) + " " + str(
                df['ORACLE_TYPE'][i]) + "" + str(df["PRECISION"][i]) + " " + str(df['ISNULLABLE'][i])
        else:
            ws.cell(row=new_row, column=1).value = ",   " + str(df['COLUMN_NAME'][i]) + " " + str(
                df['ORACLE_TYPE'][i]) + "" + str(df["PRECISION"][i]) + " " + str(df['ISNULLABLE'][i])

    ws.cell(row=ws.max_row + 1, column=1).value = new_col_str

    new_row = pre + df.shape[0] + 11

    if df[df['ISPK'].notnull()].shape[0] == 1:
        pk_id_str = str(tuple(df[df['ISPK'].notnull()]['COLUMN_NAME'])).replace("',", '')
        pk_id_str = pk_id_str.replace("'", '')
        ws.cell(row=new_row, column=1).value = ", CONSTRAINT PK_" + tablename + " PRIMARY KEY "
        ws.cell(row=new_row + 1, column=1).value = pk_id_str
    elif df[df['ISPK'].notnull()].shape[0] > 1:

        pk_id_str = str(tuple(df[df['ISPK'].notnull()]['COLUMN_NAME'])).replace("'", '')
        ws.cell(row=new_row, column=1).value = ", CONSTRAINT PK_" + tablename + " PRIMARY KEY "
        ws.cell(row=new_row + 1, column=1).value = pk_id_str
    else:
        print("没有主键")

    ws.cell(row=new_row + 2, column=1).value = ")"
    ##加分区 如果有字段GEN_DATE 就加分区
    if 'GEN_DATE' in list(df['COLUMN_NAME']):
        part_row = 21
        ws.cell(row=new_row + 3, column=1).value = "partition by range(GEN_DATE)"
        ws.cell(row=new_row + 4, column=1).value = "("
        ws.cell(row=new_row + 5,
                column=1).value = "  partition P_202001 values less than (to_date('2020-02-01','yyyy-mm-dd')) ,"
        ws.cell(row=new_row + 6,
                column=1).value = "  partition P_202002 values less than (to_date('2020-03-01','yyyy-mm-dd')) ,"
        ws.cell(row=new_row + 7,
                column=1).value = "  partition P_202003 values less than (to_date('2020-04-01','yyyy-mm-dd')) ,"
        ws.cell(row=new_row + 8,
                column=1).value = "  partition P_202004 values less than (to_date('2020-05-01','yyyy-mm-dd')) ,"
        ws.cell(row=new_row + 9,
                column=1).value = "  partition P_202005 values less than (to_date('2020-06-01','yyyy-mm-dd')) ,"
        ws.cell(row=new_row + 10,
                column=1).value = "  partition P_202006 values less than (to_date('2020-07-01','yyyy-mm-dd')) ,"
        ws.cell(row=new_row + 11,
                column=1).value = "  partition P_202007 values less than (to_date('2020-08-01','yyyy-mm-dd')) ,"
        ws.cell(row=new_row + 12,
                column=1).value = "  partition P_202008 values less than (to_date('2020-09-01','yyyy-mm-dd')) ,"
        ws.cell(row=new_row + 13,
                column=1).value = "  partition P_202009 values less than (to_date('2020-10-01','yyyy-mm-dd')) ,"
        ws.cell(row=new_row + 14,
                column=1).value = "  partition P_202010 values less than (to_date('2020-11-01','yyyy-mm-dd')) ,"
        ws.cell(row=new_row + 15,
                column=1).value = "  partition P_202011 values less than (to_date('2020-12-01','yyyy-mm-dd')) ,"
        ws.cell(row=new_row + 16,
                column=1).value = "  partition P_202012 values less than (to_date('2021-01-01','yyyy-mm-dd')) ,"
        ws.cell(row=new_row + 17,
                column=1).value = "  partition P_202101 values less than (to_date('2021-02-01','yyyy-mm-dd')) ,"
        ws.cell(row=new_row + 18,
                column=1).value = "  partition P_202102 values less than (to_date('2021-03-01','yyyy-mm-dd')) ,"
        ws.cell(row=new_row + 19,
                column=1).value = "  partition P_202103 values less than (to_date('2021-04-01','yyyy-mm-dd')) ,"
        ws.cell(row=new_row + 20,
                column=1).value = "  partition P_202104 values less than (to_date('2021-05-01','yyyy-mm-dd')) ,"
        ws.cell(row=new_row + 21,
                column=1).value = "  partition P_202105 values less than (to_date('2021-06-01','yyyy-mm-dd')) ,"
        ws.cell(row=new_row + 22,
                column=1).value = "  partition P_202106 values less than (to_date('2021-07-01','yyyy-mm-dd'))"
        ws.cell(row=new_row + 23, column=1).value = ");"
    else:
        part_row = 0

        ws.cell(row=part_row + new_row + 3, column=1).value = ";"

    ##向后退行
    ws.cell(row=part_row + new_row + 4, column=1).value = "--Table extended properties..."
    ws.cell(row=part_row + new_row + 5, column=1).value = "comment on table " + tablename + " is '" + \
                                                          df['TABLE_COMMENT'].iloc[0] + "';"
    ws.cell(row=part_row + new_row + 7, column=1).value = "--Column extended properties;"

    for i in range(0, len(df)):
        base_row = part_row + pre + df.shape[0] + 11 + 8
        new_row = base_row + i
        ws.cell(row=new_row, column=1).value = "comment on column " + tablename + "." + df['COLUMN_NAME'].iloc[
            i] + " is '" + df['COLUMN_COMMENT'].iloc[i] + "';"


if __name__ == '__main__':
    # 读取要生成的sql的表结构@param1 文件名 return  读到的所有数据
    all_df = get_df('老预定表结构.xlsx')
    # @param1 要写入的模板
    wb = openpyxl.load_workbook(r'work_tmplate.xlsx')
    # 都写在一个sheet 里面
    ws = wb.worksheets[0]
    # 加固定顶一行:
    new_col_str = ",   capxtimestamp  TIMESTAMP(6) default systimestamp"
    # 所有的表名
    table_list = list(all_df.drop_duplicates(['TABLE_NAME'])['TABLE_NAME'])
    # 循环生成建表语句
    for i in range(0, len(table_list)):
        df = all_df[all_df['TABLE_NAME'] == table_list[i]]

        gen_sql(df, table_list[i], new_col_str)
    # 生产到脚本全都生成到一个excel的sheet里面,如果直接生成.sql 文件的话,文件格式不好控制
    # 复制第一列即可
    wb.save('SQL8.xlsx')

 

### 回答1: 好的,首先你需要提供这些字段的信息,例如字段名称、数据类型、是否非空等。 假设你有以下字段信息: - id:数字类型,主键,自增长 - name:字符串类型,非空 - age:数字类型,非空 - gender:字符串类型,可以为空 - address:字符串类型,可以为空 则可以使用以下 SQL 语句来创建名为 `person` 的表: ``` CREATE TABLE person ( id serial PRIMARY KEY, name VARCHAR NOT NULL, age INTEGER NOT NULL, gender VARCHAR, address VARCHAR ); ``` 这条语句会创建一个名为 `person` 的表,其中有五个字段,分别是 `id`、`name`、`age`、`gender` 和 `address`。`id` 字段是数字类型,并且是主键,设置为自增长。`name` 和 `age` 字段是字符串类型,并且设置为非空。`gender` 和 `address` 字段是字符串类型,并且可以为空。 请注意,这只是一个示例,实际上可能会有更多的字段和更复杂的表结构。 希望这对你有帮助! ### 回答2: 生成 PostgreSQL建表语句需要根据给定的字段进行处理。以下是一个示例的程序脚本,用于根据给定的字段生成 PostgreSQL建表语句: ```python def generate_create_table_statement(fields): statement = "CREATE TABLE table_name(\n" for field in fields: statement += f"{field['name']} {field['type']}" if field['length']: statement += f"({field['length']})" if field['primary_key']: statement += " PRIMARY KEY" if not field['nullable']: statement += " NOT NULL" statement += ",\n" statement = statement.rstrip(",\n") # 移除最后一个逗号和换行符 statement += ");" return statement # 示例字段 fields = [ {"name": "id", "type": "INT", "length": None, "primary_key": True, "nullable": False}, {"name": "name", "type": "VARCHAR", "length": 50, "primary_key": False, "nullable": True}, {"name": "age", "type": "INT", "length": None, "primary_key": False, "nullable": True} ] create_table_statement = generate_create_table_statement(fields) print(create_table_statement) ``` 以上代码中,我们定义了一个 `generate_create_table_statement()` 函数,该函数接受字段列表作为参数,将这些字段逐个处理拼接到 `CREATE TABLE` 语句中。字段的属性包括字段名 `name`、字段类型 `type`、字段长度 `length`、是否为主键 `primary_key`,以及是否可为空 `nullable`。 使用示例字段生成的 PostgreSQL 建表语句如下所示: ``` CREATE TABLE table_name( id INT PRIMARY KEY NOT NULL, name VARCHAR(50), age INT ); ``` 你可以将字段列表替换为自己的数据,并调用函数 `generate_create_table_statement()` 来生成自定义的 PostgreSQL 建表语句。 ### 回答3: 在生成postgresql建表语句之前,需要了解这些字段的具体信息,包括字段名称、数据类型、长度、约束条件等。以下是一个例子,假设我们有以下字段信息: 字段1:用户ID(user_id),数据类型为整型(int),长度为10,主键(PRIMARY KEY); 字段2:用户名(username),数据类型为字符串型(varchar),长度为50,唯一性约束(UNIQUE); 字段3:年龄(age),数据类型为整型(int),长度为2,非空约束(NOT NULL); 字段4:性别(gender),数据类型为字符串型(varchar),长度为10; 字段5:注册日期(register_date),数据类型为日期型(date)。 根据以上字段生成postgresql建表语句如下: CREATE TABLE 表名 ( user_id INT(10) PRIMARY KEY, username VARCHAR(50) UNIQUE, age INT(2) NOT NULL, gender VARCHAR(10), register_date DATE ); 以上语句中,CREATE TABLE表示创建一张表,表名可以根据实际情况进行命名。括号内每一行表示一个字段,包括字段名称和字段的数据类型及约束条件。其中,INT表示整型,VARCHAR表示字符串型,PRIMARY KEY表示主键,UNIQUE表示唯一性约束,NOT NULL表示非空约束,DATE表示日期型。 根据具体的字段信息,可以根据上述格式进行调整和修改,以生成适合的postgresql建表语句
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值