首先,因为我工作是使用最多的是MySQL数据库,所有该方式仅适用于MySQL,其他方法可以类似处理即可,废话不多说,直接上代码:SET SESSION group_concat_max_len = 102400;
SELECT
a.TABLE_NAME ,
b.TABLE_COMMENT ,
concat('DROP TABLE IF EXISTS ','tb_ods_',a.TABLE_NAME,';',CHAR(10 USING utf8),
'CREATE EXTERNAL TABLE IF NOT EXISTS ','tb_ods_',a.TABLE_NAME ,' (',CHAR(10 USING utf8),
group_concat(
concat(a.COLUMN_NAME,' ',
(case when a.data_type='bigint' then 'bigint'
when a.data_type='binary' then 'binary'
when a.data_type='char' then 'string'
when a.data_type='date' then 'string'
when a.data_type='datetime' then 'string'
when a.data_type='decimal' then concat('decimal','(',a.NUMERIC_PRECISION,',',a.NUMERIC_SCALE,')')
when a.data_type='double' then 'double'
when a.data_type='enum' then 'string'
when a.data_type='float' then 'double'
when a.data_type='int' then 'int'
when a.data_type='json' then 'map<string,string>'
when a.data_type='longtext' then 'string'
when a.data_type='mediumtext' then 'string'
when a.data_type='smallint' then 'int'
when a.data_type='text' then 'string'
when a.data_type='time' then 'string'
when a.data_type='timestamp' then 'string'
when a.data_type='tinyint' then 'int'
when a.data_type='varbinary' then 'binary'
when a.data_type='varchar' then 'string'
else '未知类型'
end)," COMMENT '",COLUMN_COMMENT,"'" ),CHAR(10 USING utf8)
order by a.TABLE_NAME,a.ORDINAL_POSITION) ,
") COMMENT '",b.TABLE_COMMENT ,"'",CHAR(10 USING utf8), "PARTITIONED BY (deal_date string COMMENT '数据日期')
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES ('serialization.null.format' = 'null');",CHAR(10 USING utf8)) AS ods_ddl
FROM
(
SELECT
TABLE_SCHEMA,
TABLE_NAME,
COLUMN_NAME,
ORDINAL_POSITION,
DATA_TYPE,
NUMERIC_PRECISION,
NUMERIC_SCALE,
COLUMN_COMMENT
FROM
information_schema.COLUMNS
WHERE
TABLE_SCHEMA='${DATABASE_NAME}'
AND TABLE_NAME='${TABLE_NAME}'
) AS a
LEFT JOIN
information_schema.TABLES AS b
ON
a.TABLE_NAME=b.TABLE_NAME
AND a.TABLE_SCHEMA=b.TABLE_SCHEMA
where b.TABLE_TYPE='BASE TABLE'
GROUP BY
a.TABLE_NAME,
b.TABLE_COMMENT
;
实际拼接生成的hive DDL效果:DROP TABLE IF EXISTS tb_ods_spider;
CREATE EXTERNAL TABLE IF NOT EXISTS tb_ods_spider (
uscc string COMMENT '统一社会信用代码'
,org_name string COMMENT '企业名称'
,org_code string COMMENT '组织机构代码'
,regist_no string COMMENT '工商注册号'
,TIN string COMMENT '纳税人识别号'
,LAR string COMMENT '法定代表人'
,register_state string COMMENT '登记状态'
,build_date string COMMENT '成立日期'
,register_amount string COMMENT '注册资本'
,paid_amount string COMMENT '实缴资本'
,verify_date string COMMENT '核准日期'
,company_type string COMMENT '企业类型'
,TQ string COMMENT '纳税人资质'
,belong_area string COMMENT '所属地区'
,register_authority string COMMENT '登记机关'
,staff_size string COMMENT '人员规模'
,business_term string COMMENT '营业期限'
,insure_num string COMMENT '参保人数'
,in_out_code string COMMENT '进出口代码'
) COMMENT '企业信息'
PARTITIONED BY (deal_date string COMMENT '数据日期')
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES ('serialization.null.format' = 'null');