py转换sql脚本
mysql建表语句转hive建表语句
以下mysqlsql模板,保存在mysql_table_sql.sql文件中,注意因为脚本转换会将字段注释一并转换过去,务必保证每个字段都有注释(如果不需要,在py脚本中需要去掉匹配注释的代码),除正常字段以外,只需要保留PRIMARY这一行,如果有索引或者唯一键约束的,需要去除,否则匹配时,会出现索引越界异常。
CREATE TABLE `t_park_day_report` (
`id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '主键',
`statistics_date` varchar(20) COLLATE utf8mb4_bin NOT NULL COMMENT '统计日期',
`wait_duration1` int(10) DEFAULT '0' COMMENT '9.6米上预约车辆等待时长(驶出-签到)',
`handle_duration1` int(10) DEFAULT '0' COMMENT '9.6米上装卸时长(驶出-驶入)',
`fulfillment_rate1` decimal(5,2) DEFAULT '0.00' COMMENT '9.6米上预约车辆等待履约率',
`wait_duration2` int(10) DEFAULT '0' COMMENT '9.6米下预约车辆等待时长(驶出-签到)',
`handle_duration2` int(10) DEFAULT '0' COMMENT '9.6米下装卸时长(驶出-驶入)',
`fulfillment_rate2` decimal(5,2) DEFAULT '0.00' COMMENT '9.6米下预约车辆等待履约率',
`fulfillment_rate` decimal(5,2) DEFAULT '0.00' COMMENT '预约车辆等待履约率',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=121 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT='日报统计表';
希望转换成hive的sql样式如下,创建一个同名的表并添加ods前缀,创建在ods库中,并设定分区,存储类型为orc,压缩类型是snappy。
drop table if exists ods.ods_t_park_day_report;
create table if not exists ods.ods_t_park_day_report(
id BIGINT comment "主键",
statistics_date STRING comment "统计日期",
wait_duration1 BIGINT comment "9.6米上预约车辆等待时长(驶出-签到)",
handle_duration1 BIGINT comment "9.6米上装卸时长(驶出-驶入)",
fulfillment_rate1 decimal(5,2) comment "9.6米上预约车辆等待履约率",
wait_duration2 BIGINT comment "9.6米下预约车辆等待时长(驶出-签到)",
handle_duration2 BIGINT comment "9.6米下装卸时长(驶出-驶入)",
fulfillment_rate2 decimal(5,2) comment "9.6米下预约车辆等待履约率",
fulfillment_rate decimal(5,2) comment "预约车辆等待履约率"
) comment "日报统计表" partitioned by(dt String)
stored as orc
TBLPROPERTIES ("orc.compress"="SNAPPY");
py脚本如下
# mysql转hivesql脚本 需要python环境
import re
class CreateTableSql():
def __init__(self,sql):
self.create_table_mysql=sql
def analyse_mysql_create_table(self):
"""
解析mysql建表语句:
:param create_table_sql:
:return: 字段名称、字段注释、字段类型、表注释
"""
# 字段名称
name_pattern = r"`(\w+)`"
varname_list = re.findall(name_pattern, self.create_table_mysql)
# 字段注释
comment_pattern = r"COMMENT \'(.*)\'"
varcomment_list = re.findall(comment_pattern, self.create_table_mysql)
# 字段类型
type_pattern = r"` (.*) COMMENT"
type_list = re.findall(type_pattern, self.create_table_mysql)
# 表注释
tcomment_pattern = r"COMMENT='(.*)'"
tcomment_list = re.findall(tcomment_pattern, self.create_table_mysql)
return varname_list, varcomment_list, type_list, tcomment_list
def change_type(self,cloumns_type):
'''
mysql字段类型转化成hive字段类型,这里的mysql类型和hive类型转换可以根据自己的需求修改
因为采用的是search匹配,如果存在相同字符的类型,需要将能完全匹配的类型放在前面,比如int,bigint,就需要将int类型放在bigint类型前面,否则结果生成的时候会将bigint也匹配到int
# type: mysql => hive sql
'''
if re.search(r'int', cloumns_type) != None:
return 'BIGINT'
elif re.search(r'decimal', cloumns_type) != None:
format = re.search("decimal\(.*?\)", cloumns_type)
return 'decimal' if format is None else format.group()
elif re.search(r'varchar', cloumns_type) != None:
return 'STRING'
elif re.search(r'timestamp', cloumns_type) != None:
return 'STRING'
elif re.search(r'text', cloumns_type) != None:
return 'STRING'
elif re.search(r'datetime', cloumns_type) != None:
return 'STRING'
elif re.search(r'date', cloumns_type) != None:
return 'STRING'
elif re.search(r'time', cloumns_type) != None:
return 'STRING'
elif re.search(r'char', cloumns_type) != None:
return 'STRING'
elif re.search(r'mediumint', cloumns_type) != None:
return 'INT'
elif re.search(r'tinyint', cloumns_type) != None:
return 'BIGINT'
elif re.search(r'smallint', cloumns_type) != None:
return 'BIGINT'
elif re.search(r'bigint', cloumns_type) != None:
return 'BIGINT'
elif re.search(r'double', cloumns_type) != None:
return 'DOUBLE'
elif re.search(r'float', cloumns_type) != None:
return 'DOUBLE'
else:
return cloumns_type
def mysql_to_hsql(self):
"""
生成Hive建表语句
:return:
"""
varname_list, varcomment_list, type_list, tcomment_list = self.analyse_mysql_create_table()
#test,这里写固定的开头和数据库与表的前缀
hive_sql = "drop table if exists ods.ods_"+ str(varname_list[0] + ";\n") + "create table if not exists ods.ods_" + str(varname_list[0] + "(\n")
for i in range(len(varname_list) - 2):
if i != len(varname_list) - 3:
hive_sql = hive_sql + " " + str(varname_list[i + 1]) + " " + str(
self.change_type(type_list[i])) + " comment \"" + str(varcomment_list[i]) + "\",\n"
else:
hive_sql = hive_sql + " " + str(varname_list[i + 1]) + " " + str(
self.change_type(type_list[i])) + " comment \"" + str(varcomment_list[i]) + "\"\n"
hive_sql = hive_sql + ") comment \"" + str(tcomment_list[0]) + "\""+" partitioned by(dt String) \n" + " stored as orc \n TBLPROPERTIES (\"orc.compress\"=\"SNAPPY\");"
print(hive_sql)
# 将mysql建表语句保存在这个文件中,自动读取,结果会打印到控制台
with open('../mysql_table_sql.sql', 'r', encoding='utf-8') as file:
create_table_sql = str(file.read())
cts=CreateTableSql(create_table_sql)
cts.mysql_to_hsql()
mysql建表语句创建hbase外部表
首先需要在hbase中先创建一张表,比如在命名空间dwd中创建一张表名为hbase_t_park_day_report的表,命名空间不存在的话需要先创建好。
-- 创建命名空间,注意需要单引号括起来
create_namespace 'dwd'
-- 创建表,一个列簇,压缩格式选snappy
create 'dwd:hbase_t_park_day_report', {NAME => 'cf1', VERSIONS => 1, COMPRESSION => 'snappy'}
希望生成的hive外部表如下,默认rowkey字段名称都是key,并且生成对应列与hbase表的字段映射,hbase表的命名空间和表名。
drop table if exists dwd.dwd_hbase_t_park_day_report;
create external table if not exists dwd.dwd_hbase_t_park_day_report(
key STRING comment "rowkey",
id BIGINT comment "主键",
statistics_date STRING comment "统计日期",
wait_duration1 BIGINT comment "9.6米上预约车辆等待时长(驶出-签到)",
handle_duration1 BIGINT comment "9.6米上装卸时长(驶出-驶入)",
fulfillment_rate1 decimal(5,2) comment "9.6米上预约车辆等待履约率",
wait_duration2 BIGINT comment "9.6米下预约车辆等待时长(驶出-签到)",
handle_duration2 BIGINT comment "9.6米下装卸时长(驶出-驶入)",
fulfillment_rate2 decimal(5,2) comment "9.6米下预约车辆等待履约率",
fulfillment_rate decimal(5,2) comment "预约车辆等待履约率"
) comment "日报统计表"
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:id,cf1:statistics_date,cf1:wait_duration1,cf1:handle_duration1,cf1:fulfillment_rate1,cf1:wait_duration2,cf1:handle_duration2,cf1:fulfillment_rate2,cf1:fulfillment_rate")
TBLPROPERTIES ("hbase.table.name" = "dwd:hbase_t_park_day_report", "hbase.mapred.output.outputtable" = "hbase_t_park_day_report");
py脚本如下,内容可以按照实际情况来修改
# 创建hbase hive外部表
import re
class CreateTableSql():
def __init__(self,sql):
self.create_table_mysql=sql
def analyse_mysql_create_table(self):
"""
解析mysql建表语句:
:param create_table_sql:
:return: 字段名称、字段注释、字段类型、表注释
"""
# 字段名称
name_pattern = r"`(\w+)`"
varname_list = re.findall(name_pattern, self.create_table_mysql)
# 字段注释
comment_pattern = r"COMMENT \'(.*)\'"
varcomment_list = re.findall(comment_pattern, self.create_table_mysql)
# 字段类型
type_pattern = r"` (.*) COMMENT"
type_list = re.findall(type_pattern, self.create_table_mysql)
# 表注释
tcomment_pattern = r"COMMENT='(.*)'"
tcomment_list = re.findall(tcomment_pattern, self.create_table_mysql)
return varname_list, varcomment_list, type_list, tcomment_list
def change_type(self,cloumns_type):
'''
mysql字段类型转化成hive字段类型
# type: mysql => hive sql
'''
if re.search(r'int', cloumns_type) != None:
return 'BIGINT'
elif re.search(r'decimal', cloumns_type) != None:
format = re.search("decimal\(.*?\)", cloumns_type)
return 'decimal' if format is None else format.group()
elif re.search(r'varchar', cloumns_type) != None:
return 'STRING'
elif re.search(r'timestamp', cloumns_type) != None:
return 'TIMESTAMP'
elif re.search(r'text', cloumns_type) != None:
return 'STRING'
elif re.search(r'datetime', cloumns_type) != None:
return 'STRING'
elif re.search(r'date', cloumns_type) != None:
return 'STRING'
elif re.search(r'char', cloumns_type) != None:
return 'STRING'
elif re.search(r'mediumint', cloumns_type) != None:
return 'INT'
elif re.search(r'tinyint', cloumns_type) != None:
return 'BIGINT'
elif re.search(r'smallint', cloumns_type) != None:
return 'BIGINT'
elif re.search(r'bigint', cloumns_type) != None:
return 'BIGINT'
elif re.search(r'double', cloumns_type) != None:
return 'DOUBLE'
elif re.search(r'float', cloumns_type) != None:
return 'DOUBLE'
else:
return cloumns_type
def mysql_to_hsql(self):
"""
生成Hive建表语句
:return:
"""
varname_list, varcomment_list, type_list, tcomment_list = self.analyse_mysql_create_table()
hive_sql = "drop table if exists dwd.dwd_hbase_"+ str(varname_list[0] + ";\n") +"create external table if not exists dwd.dwd_hbase_" + str(varname_list[0] + "(\n")+" key STRING comment \"rowkey\", \n"
for i in range(len(varname_list) - 2):
if i != len(varname_list) - 3:
hive_sql = hive_sql + " " + str(varname_list[i + 1]) + " " + str(
self.change_type(type_list[i])) + " comment \"" + str(varcomment_list[i]) + "\",\n"
else:
hive_sql = hive_sql + " " + str(varname_list[i + 1]) + " " + str(
self.change_type(type_list[i])) + " comment \"" + str(varcomment_list[i]) + "\"\n"
hive_sql = hive_sql + ") comment \"" + str(tcomment_list[0]) + "\"\n"+" STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'" +"\n WITH SERDEPROPERTIES (\"hbase.columns.mapping\" = \":key,"
for i in range(len(varname_list) - 2):
if i != len(varname_list) - 3:
hive_sql = hive_sql + "cf1:" + str(varname_list[i + 1]) + ","
else:
hive_sql = hive_sql + "cf1:" + str(varname_list[i + 1]) + "\")\n"
hive_sql = hive_sql + " TBLPROPERTIES (\"hbase.table.name\" = \"dwd:hbase_"+str(varname_list[0])+"\", \"hbase.mapred.output.outputtable\" = \"hbase_"+str(varname_list[0])+"\");"
print(hive_sql)
# 同样读取mysql建表语句
with open('../mysql_table_sql.sql', 'r', encoding='utf-8') as file:
create_table_sql = str(file.read())
cts=CreateTableSql(create_table_sql)
cts.mysql_to_hsql()
已创建hbase表后,创建phoenix映射表
希望通过phoenix创建hbase映射表来操作hbase数据,希望生成的sql如下
create table "dwd"."hbase_t_park_day_report"(
"key" varchar NOT null primary key ,
"cf1"."id" varchar,
"cf1"."statistics_date" varchar,
"cf1"."wait_duration1" varchar,
"cf1"."handle_duration1" varchar,
"cf1"."fulfillment_rate1" varchar,
"cf1"."wait_duration2" varchar,
"cf1"."handle_duration2" varchar,
"cf1"."fulfillment_rate2" varchar,
"cf1"."fulfillment_rate" varchar
) column_encoded_bytes=0;
-- 这时候一定要column_encoded_bytes=0结尾,否则创建后查看不到hbase表数据
需要先通过phoenix创建一个同名的命名空间,然后在运行以上建表sql,即可与hbase表进行关联,可以插入删除数据,如果删除表后,hbase表也会同步删除。
py脚本如下
# 创建hbase hive外部表
import re
class CreateTableSql():
def __init__(self,sql):
self.create_table_mysql=sql
def analyse_mysql_create_table(self):
"""
解析mysql建表语句:
:param create_table_sql:
:return: 字段名称、字段注释、字段类型、表注释
"""
# 字段名称
name_pattern = r"`(\w+)`"
varname_list = re.findall(name_pattern, self.create_table_mysql)
# 字段注释
comment_pattern = r"COMMENT \'(.*)\'"
varcomment_list = re.findall(comment_pattern, self.create_table_mysql)
# 字段类型
type_pattern = r"` (.*) COMMENT"
type_list = re.findall(type_pattern, self.create_table_mysql)
# 表注释
tcomment_pattern = r"COMMENT='(.*)'"
tcomment_list = re.findall(tcomment_pattern, self.create_table_mysql)
return varname_list, varcomment_list, type_list, tcomment_list
def change_type(self,cloumns_type):
'''
mysql字段类型转化成hive字段类型
# type: mysql => hive sql
'''
if re.search(r'int', cloumns_type) != None:
return 'varchar'
elif re.search(r'decimal', cloumns_type) != None:
format = re.search("decimal\(.*?\)", cloumns_type)
return 'varchar'
elif re.search(r'varchar', cloumns_type) != None:
return 'varchar'
elif re.search(r'timestamp', cloumns_type) != None:
return 'varchar'
elif re.search(r'text', cloumns_type) != None:
return 'varchar'
elif re.search(r'datetime', cloumns_type) != None:
return 'varchar'
elif re.search(r'date', cloumns_type) != None:
return 'varchar'
elif re.search(r'char', cloumns_type) != None:
return 'varchar'
elif re.search(r'mediumint', cloumns_type) != None:
return 'varchar'
elif re.search(r'tinyint', cloumns_type) != None:
return 'varchar'
elif re.search(r'smallint', cloumns_type) != None:
return 'varchar'
elif re.search(r'bigint', cloumns_type) != None:
return 'varchar'
elif re.search(r'double', cloumns_type) != None:
return 'varchar'
elif re.search(r'float', cloumns_type) != None:
return 'varchar'
else:
return cloumns_type
def mysql_to_hsql(self):
"""
生成Hive建表语句
:return:
"""
schema = "dwd"
# schema = "FLINK_REALTIME"
varname_list, varcomment_list, type_list, tcomment_list = self.analyse_mysql_create_table()
# hive_sql = "create table \"" + schema + "\".\"dwd_hbase_" + str(varname_list[0] + "\"(\n")+" \"key\" varchar NOT null primary key comment \"rowkey\" , \n"
hive_sql = "create table \"" + schema + "\".\"hbase_" + str(varname_list[0] + "\"(\n")+" \"key\" varchar NOT null primary key , \n"
for i in range(len(varname_list) - 2):
if i != len(varname_list) - 3:
hive_sql = hive_sql + " \"cf1\".\"" + str(varname_list[i + 1]) + "\" " + str(
# self.change_type(type_list[i])) + " comment '" + str(varcomment_list[i]) + "',\n"
self.change_type(type_list[i])) + ",\n"
else:
hive_sql = hive_sql + " \"cf1\".\"" + str(varname_list[i + 1]) + "\" " + str(
# self.change_type(type_list[i])) + " comment '" + str(varcomment_list[i]) + "'\n"
self.change_type(type_list[i])) + "\n"
# hive_sql = hive_sql + ") comment \"" + str(tcomment_list[0]) + "\" "+" column_encoded_bytes=0;"
hive_sql = hive_sql + ") column_encoded_bytes=0;"
print(hive_sql)
with open('../mysql_table_sql.sql', 'r', encoding='utf-8') as file:
create_table_sql = str(file.read())
cts=CreateTableSql(create_table_sql)
cts.mysql_to_hsql()
mysql建表语句转换成sqlserver建表语句并创建字段注释
根据已有的建表语句创建sqlserver建表语句,自定义字符排序规则和大小写。
想要生成的sqlserver建表语句如下,小写
CREATE TABLE [dbo].[t_park_day_report] (
[id] BIGINT NOT NULL ,
[statistics_date] nvarchar(20) COLLATE Chinese_PRC_CI_AS NULL ,
[wait_duration1] INT NULL ,
[handle_duration1] INT NULL ,
[fulfillment_rate1] decimal(5,2) NULL ,
[wait_duration2] INT NULL ,
[handle_duration2] INT NULL ,
[fulfillment_rate2] decimal(5,2) NULL ,
[fulfillment_rate] decimal(5,2) NULL ,
CONSTRAINT [PK__t_park___D6TNN1E8GP4GMEU0] PRIMARY KEY CLUSTERED ([id])
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
)
execute sp_addextendedproperty 'MS_Description','日报统计表','user','dbo','table','t_park_day_report',null,null
execute sp_addextendedproperty 'MS_Description','主键','user','dbo','table','t_park_day_report','column','id'
execute sp_addextendedproperty 'MS_Description','统计日期','user','dbo','table','t_park_day_report','column','statistics_date'
execute sp_addextendedproperty 'MS_Description','9.6米上预约车辆等待时长(驶出-签到)','user','dbo','table','t_park_day_report','column','wait_duration1'
execute sp_addextendedproperty 'MS_Description','9.6米上装卸时长(驶出-驶入)','user','dbo','table','t_park_day_report','column','handle_duration1'
execute sp_addextendedproperty 'MS_Description','9.6米上预约车辆等待履约率','user','dbo','table','t_park_day_report','column','fulfillment_rate1'
execute sp_addextendedproperty 'MS_Description','9.6米下预约车辆等待时长(驶出-签到)','user','dbo','table','t_park_day_report','column','wait_duration2'
execute sp_addextendedproperty 'MS_Description','9.6米下装卸时长(驶出-驶入)','user','dbo','table','t_park_day_report','column','handle_duration2'
execute sp_addextendedproperty 'MS_Description','9.6米下预约车辆等待履约率','user','dbo','table','t_park_day_report','column','fulfillment_rate2'
execute sp_addextendedproperty 'MS_Description','预约车辆等待履约率','user','dbo','table','t_park_day_report','column','fulfillment_rate'
大写
CREATE TABLE [dbo].[T_PARK_DAY_REPORT] (
[ID] INT NOT NULL ,
[STATISTICS_DATE] nvarchar(20) COLLATE Chinese_PRC_CS_AS NULL ,
[WAIT_DURATION1] INT NULL ,
[HANDLE_DURATION1] INT NULL ,
[FULFILLMENT_RATE1] decimal(5,2) NULL ,
[WAIT_DURATION2] INT NULL ,
[HANDLE_DURATION2] INT NULL ,
[FULFILLMENT_RATE2] decimal(5,2) NULL ,
[FULFILLMENT_RATE] decimal(5,2) NULL ,
CONSTRAINT [PK__T_PARK___LFZXHO1NACL7P6RL] PRIMARY KEY CLUSTERED ([ID])
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
)
execute sp_addextendedproperty 'MS_Description','日报统计表','user','dbo','table','T_PARK_DAY_REPORT',null,null
execute sp_addextendedproperty 'MS_Description','主键','user','dbo','table','T_PARK_DAY_REPORT','column','ID'
execute sp_addextendedproperty 'MS_Description','统计日期','user','dbo','table','T_PARK_DAY_REPORT','column','STATISTICS_DATE'
execute sp_addextendedproperty 'MS_Description','9.6米上预约车辆等待时长(驶出-签到)','user','dbo','table','T_PARK_DAY_REPORT','column','WAIT_DURATION1'
execute sp_addextendedproperty 'MS_Description','9.6米上装卸时长(驶出-驶入)','user','dbo','table','T_PARK_DAY_REPORT','column','HANDLE_DURATION1'
execute sp_addextendedproperty 'MS_Description','9.6米上预约车辆等待履约率','user','dbo','table','T_PARK_DAY_REPORT','column','FULFILLMENT_RATE1'
execute sp_addextendedproperty 'MS_Description','9.6米下预约车辆等待时长(驶出-签到)','user','dbo','table','T_PARK_DAY_REPORT','column','WAIT_DURATION2'
execute sp_addextendedproperty 'MS_Description','9.6米下装卸时长(驶出-驶入)','user','dbo','table','T_PARK_DAY_REPORT','column','HANDLE_DURATION2'
execute sp_addextendedproperty 'MS_Description','9.6米下预约车辆等待履约率','user','dbo','table','T_PARK_DAY_REPORT','column','FULFILLMENT_RATE2'
execute sp_addextendedproperty 'MS_Description','预约车辆等待履约率','user','dbo','table','T_PARK_DAY_REPORT','column','FULFILLMENT_RATE'
py脚本如下,大小写,只需要在拼接字段时添加函数即可,大写.upper(),小写.lower()
# 根据mysql建表语句创建 sqlserver建表语句 并添加注释 字段全大写
#Chinese_PRC_CS_AI 区分大小写 不区分重音符号 不区分全角半角
#Chinese_PRC_CS_AS 区分大小写 区分重音符号 区分全角半角
#Chinese_PRC_CI_AI 不区分大小写 不区分重音符号 不区分全角半角
#Chinese_PRC_CI_AS 不区分大小写 区分重音符号 区分全角半角
import re
import random,string
#生成随机字符加数字
def generate_random_str(randomlength=16):
"""
生成一个指定长度的随机字符串
"""
random_str = ''
base_str = '0123456789ABCDEFGHIGKLMNOPQRSTUVWXYZ'
length = len(base_str) - 1
for i in range(randomlength):
random_str += base_str[random.randint(0, length)]
return random_str
class CreateTableSql():
def __init__(self,sql):
self.create_table_mysql=sql
def analyse_mysql_create_table(self):
"""
解析mysql建表语句:
:param create_table_sql:
:return: 字段名称、字段注释、字段类型、表注释
"""
# 字段名称
name_pattern = r"`(\w+)`"
varname_list = re.findall(name_pattern, self.create_table_mysql)
# 字段注释
comment_pattern = r"COMMENT \'(.*)\'"
varcomment_list = re.findall(comment_pattern, self.create_table_mysql)
# 字段类型
type_pattern = r"` (.*) COMMENT"
type_list = re.findall(type_pattern, self.create_table_mysql)
# 表注释
tcomment_pattern = r"COMMENT='(.*)'"
tcomment_list = re.findall(tcomment_pattern, self.create_table_mysql)
return varname_list, varcomment_list, type_list, tcomment_list
def change_type(self,cloumns_type):
'''
mysql字段类型转化成sqlserver字段类型
# type: mysql => sqlserver sql
'''
if re.search(r'int', cloumns_type) != None:
return 'INT'
elif re.search(r'decimal', cloumns_type) != None:
format = re.search("decimal\(.*?\)", cloumns_type)
return 'decimal' if format is None else format.group()
elif re.search(r'varchar', cloumns_type) != None:
format = re.search("varchar\(.*?\)", cloumns_type)
#Chinese_PRC_CS_AS
#Chinese_PRC_CI_AS
# 排序规则在这里设置修改
string = format.group()+" COLLATE Chinese_PRC_CS_AS "
return 'nvarchar COLLATE Chinese_PRC_CS_AS' if format is None else string.replace("varchar","nvarchar")
elif re.search(r'timestamp', cloumns_type) != None:
return 'datetime'
elif re.search(r'text', cloumns_type) != None:
return 'text'
elif re.search(r'datetime', cloumns_type) != None:
return 'datetime'
elif re.search(r'date', cloumns_type) != None:
return 'date'
elif re.search(r'time', cloumns_type) != None:
return 'time'
elif re.search(r'char', cloumns_type) != None:
return 'char'
elif re.search(r'mediumint', cloumns_type) != None:
return 'INT'
elif re.search(r'tinyint', cloumns_type) != None:
return 'tinyint'
elif re.search(r'smallint', cloumns_type) != None:
return 'BIGINT'
elif re.search(r'bigint', cloumns_type) != None:
return 'BIGINT'
elif re.search(r'double', cloumns_type) != None:
return 'DOUBLE'
elif re.search(r'float', cloumns_type) != None:
return 'DOUBLE'
else:
return cloumns_type
def mysql_to_hsql(self):
"""
生成sqlserver建表语句
:return:
"""
varname_list, varcomment_list, type_list, tcomment_list = self.analyse_mysql_create_table()
sqlserver_sql = "CREATE TABLE [dbo].[" + str(varname_list[0].upper() + "] (\n")
for i in range(len(varname_list) - 2):
if i == 0:
sqlserver_sql = sqlserver_sql + " [" + str(varname_list[i + 1].upper()) + "] " + str(
self.change_type(type_list[i])) + " NOT NULL ,\n"
else:
sqlserver_sql = sqlserver_sql + " [" + str(varname_list[i + 1].upper()) + "] " + str(
self.change_type(type_list[i])) + " NULL ,\n"
sqlserver_sql = sqlserver_sql + "CONSTRAINT [PK__"+varname_list[0].upper()[:7]+"__"+generate_random_str()+"] PRIMARY KEY CLUSTERED (["+str(varname_list[1].upper())+"])" +"\n"
sqlserver_sql = sqlserver_sql + "WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) \n)\n"
#添加表中文注释
sqlserver_sql = sqlserver_sql +"execute sp_addextendedproperty 'MS_Description','"+tcomment_list[0]+"','user','dbo','table','"+varname_list[0].upper()+"',null,null \n"
#添加字段中文注释
for i in range(len(varname_list) - 2):
sqlserver_sql = sqlserver_sql +"execute sp_addextendedproperty 'MS_Description','"+varcomment_list[i]+"','user','dbo','table','"+varname_list[0].upper()+"','column','"+varname_list[i+1].upper()+"' \n"
print(sqlserver_sql)
# for i in range(len(varname_list) - 2):
# print(type_list[i])
with open('../mysql_table_sql.sql', 'r', encoding='utf-8') as file:
create_table_sql = str(file.read())
cts=CreateTableSql(create_table_sql)
cts.mysql_to_hsql()
快速提取mysql建表语句中的字段名称,数据类型和注释
py提取字段名称脚本
需要实现的效果如下
id
statistics_date
wait_duration1
handle_duration1
fulfillment_rate1
wait_duration2
handle_duration2
fulfillment_rate2
fulfillment_rate
# 获取mysql 字段
import re
class CreateTableSql():
def __init__(self,sql):
self.create_table_mysql=sql
def analyse_mysql_create_table(self):
"""
解析mysql建表语句:
:param create_table_sql:
:return: 字段名称、字段注释、字段类型、表注释
"""
# 字段名称
name_pattern = r"`(\w+)`"
varname_list = re.findall(name_pattern, self.create_table_mysql)
#return varname_list, varcomment_list, type_list, tcomment_list
return varname_list
def mysql_to_hsql(self):
"""
生成Hive建表语句
:return:
"""
varname_list = self.analyse_mysql_create_table()
hive_sql = ""
for i in range(len(varname_list) - 2):
if i != len(varname_list) - 3:
hive_sql = hive_sql + str(varname_list[i + 1]) + "\n"
else:
hive_sql = hive_sql + str(varname_list[i + 1])+ "\n"
#hive_sql = hive_sql + "from "
print(hive_sql)
with open('../mysql_table_sql.sql', 'r', encoding='utf-8') as file:
create_table_sql = str(file.read())
cts=CreateTableSql(create_table_sql)
cts.mysql_to_hsql()
py提取字段类型脚本
效果如下
bigint
varchar
int
int
decimal
int
int
decimal
decimal
# 根据mysql建表语句提取 字段类型
import re
class CreateTableSql():
def __init__(self,sql):
self.create_table_mysql=sql
def analyse_mysql_create_table(self):
"""
解析mysql建表语句:
:param create_table_sql:
:return: 字段名称、字段注释、字段类型、表注释
"""
# 字段类型
type_pattern = r"`\s+([a-zA-Z0-9].*)\s+"
type_list = re.findall(type_pattern, self.create_table_mysql)
return type_list
def change_type(self,cloumns_type):
'''
mysql字段类型转化成hive字段类型
# type: mysql => hive sql
'''
if re.search(r'bigint', cloumns_type) != None:
return 'bigint'
elif re.search(r'tinyint\(', cloumns_type) != None:
return 'tinyint'
elif re.search(r'decimal', cloumns_type) != None:
#format = re.search("decimal\(.*?\)", cloumns_type)
#return 'decimal' if format is None else format.group()
return 'decimal'
elif re.search(r'varchar', cloumns_type) != None:
return 'varchar'
elif re.search(r'timestamp', cloumns_type) != None:
return 'timestamp'
elif re.search(r'text', cloumns_type) != None:
return 'text'
elif re.search(r'datetime', cloumns_type) != None:
return 'datetime'
elif re.search(r'time', cloumns_type) != None:
return 'time'
elif re.search(r'date', cloumns_type) != None:
return 'date'
elif re.search(r'char', cloumns_type) != None:
return 'char'
elif re.search(r'mediumint', cloumns_type) != None:
return 'mediumint'
elif re.search(r'tinyint\(', cloumns_type) != None:
return 'tinyint'
elif re.search(r'smallint', cloumns_type) != None:
return 'smallint'
elif re.search(r'int\(', cloumns_type) != None:
return 'int'
elif re.search(r'double', cloumns_type) != None:
return 'double'
elif re.search(r'float', cloumns_type) != None:
return 'float'
else:
return cloumns_type
def mysql_to_hsql(self):
"""
生成Hive建表语句
:return:
"""
# type_list = self.analyse_mysql_create_table()
type = self.analyse_mysql_create_table()
#hive_sql = "insert overwrite table dwd."+ str(varname_list[0] + "\n") + "select \n"
hive_sql = ""
for i in range(len(type)):
#if i != len(varname_list) - 3:
#hive_sql = hive_sql + self.change_type(type_list[i]) + "\n"
#else:
hive_sql = hive_sql + self.change_type(type[i])+ "\n"
#hive_sql = hive_sql + "from "
print(hive_sql)
with open('../mysql_table_sql.sql', 'r', encoding='utf-8') as file:
create_table_sql = str(file.read())
cts=CreateTableSql(create_table_sql)
cts.mysql_to_hsql()
py提取字段注释脚本
效果如下
主键
统计日期
9.6米上预约车辆等待时长(驶出-签到)
9.6米上装卸时长(驶出-驶入)
9.6米上预约车辆等待履约率
9.6米下预约车辆等待时长(驶出-签到)
9.6米下装卸时长(驶出-驶入)
9.6米下预约车辆等待履约率
预约车辆等待履约率
py脚本
# 获取mysql表 字段注释
import re
class CreateTableSql():
def __init__(self,sql):
self.create_table_mysql=sql
def analyse_mysql_create_table(self):
"""
解析mysql建表语句:
:param create_table_sql:
:return: 字段名称、字段注释、字段类型、表注释
"""
# 字段注释
comment_pattern = r"COMMENT \'(.*)\'"
varcomment_list = re.findall(comment_pattern, self.create_table_mysql)
return varcomment_list
def mysql_to_hsql(self):
"""
生成Hive建表语句
:return:
"""
varcomment_list = self.analyse_mysql_create_table()
#hive_sql = "insert overwrite table dwd."+ str(varname_list[0] + "\n") + "select \n"
hive_sql = ""
for i in range(len(varcomment_list)):
hive_sql = hive_sql + str(varcomment_list[i])+ "\n"
#hive_sql = hive_sql + "from "
print(hive_sql)
# txt文件的中文编码是GBK py默认的是utf_8 读取的时候需要指定编码格式
with open('../mysql_table_sql.sql', 'r', encoding='utf-8') as file:
create_table_sql = str(file.read())
cts=CreateTableSql(create_table_sql)
cts.mysql_to_hsql()