flink SQL 建表格式

最新推荐文章于 2023-08-28 14:50:38 发布

资源哥nni

最新推荐文章于 2023-08-28 14:50:38 发布

阅读量1.3k

点赞数

文章标签： flink sql java

本文链接：https://blog.csdn.net/m0_71551473/article/details/127655799

版权

kafka source

CREATE TABLE `kafka_log`.`kafka_topic_log` (
`common` ROW<ar STRING,ba STRING,ch STRING,is_new STRING,md STRING,mid STRING,os STRING,uid STRING,vc STRING>,
`page` ROW<during_time STRING,item STRING,item_type STRING,last_page_id STRING,page_id STRING,source_type STRING> ,
`actions` ARRAY<ROW<action_id STRING,item STRING,item_type STRING,ts BIGINT>>,
`displays` ARRAY<ROW<display_type STRING,item STRING,item_type STRING,`order` STRING,pos_id STRING>>,
`start` ROW<entry STRING,loading_time BIGINT,open_ad_id BIGINT,open_ad_ms BIGINT,open_ad_skip_ms BIGINT>,
`err` ROW<error_code BIGINT,msg STRING>,
`ts` BIGINT
) WITH (
'connector' = 'kafka',
'topic' = 'topic_log',
'properties.bootstrap.servers' = 'hadoop1:9092,hadoop1:9092,hadoop1:9092',
'properties.group.id' = 'hudi_source',
'scan.startup.mode' = 'earliest-offset',
'format' = 'json',

--如果json缺少filed是否报错
'json.fail-on-missing-field'='false',

--是否无视json转换时的报错
'json.ignore-parse-errors' = 'true'
);

hudi+hive同步

CREATE TABLE `hudi_ods`.`ods_log` (
`uuid` STRING,
`common` ROW<ar STRING,ba STRING,ch STRING,is_new STRING,md STRING,mid STRING,os STRING,uid STRING,vc STRING>,
`page` ROW<during_time STRING,item STRING,item_type STRING,last_page_id STRING,page_id STRING,source_type STRING> ,
`actions` ARRAY<ROW<action_id STRING,item STRING,item_type STRING,ts BIGINT>>,
`displays` ARRAY<ROW<display_type STRING,item STRING,item_type STRING,`order` STRING,pos_id STRING>>,
`start` ROW<entry STRING,loading_time BIGINT,open_ad_id BIGINT,open_ad_ms BIGINT,open_ad_skip_ms BIGINT>,
`err` ROW<error_code BIGINT,msg STRING>,
`ts` BIGINT,
`dt` STRING,
`t` as TO_TIMESTAMP(FROM_UNIXTIME(ts/1000,'yyyy-MM-dd HH:mm:ss')),
WATERMARK FOR `t` AS `t` - INTERVAL '5' SECOND
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop1:8020/user/hudi/warehouse/hudi_ods/ods_log',
'table.type'='MERGE_ON_READ',

--uuid（）是内置函数，因为日志表没有主键字段。在这里设置唯一主键
'hoodie.datasource.write.recordkey.field' = 'uuid',

--预聚合，没啥用
'hoodie.datasource.write.precombine.field' = 'ts',

--并发参数，修改值修改写出文件数
'write.bucket_assign.tasks'='1',

--并发写：这里设为1减少资源占用
'write.tasks' = '1',
'compaction.tasks' = '1',

--异步聚合
'compaction.async.enabled' = 'true',

--计划聚合
'compaction.schedule.enabled' = 'true',

--策略：次数
'compaction.trigger.strategy' = 'num_commits',

--5次
'compaction.delta_commits' = '5',

--开启流读取
'read.streaming.enabled' = 'true',

--跳过compaction ，避免重复消费
'read.streaming.skip_compaction' = 'true',

--开启hive同步
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop1:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_log'
);

插表时：当把null值写入flink的notnull列时

set table.exec.sink.not-null-enforcer=drop;

这个参数可以设置删除，默认是error报错；

资源哥nni

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
flink SQL 建表格式

-uuid（）是内置函数，因为日志表没有主键字段。插表时：当把null值写入flink的notnull列时。--跳过compaction ，避免重复消费。这个参数可以设置删除，默认是error报错；--如果json缺少filed是否报错。--并发参数，修改值修改写出文件数。--并发写：这里设为1减少资源占用。--是否无视json转换时的报错。hudi+hive同步。--开启hive同步。
复制链接

扫一扫