format=json:flinksql中的kafka临时表结构参数,需要列出所有需要的字段名,对应埋点json中的key名
format=raw:flinksql中kafka临时表结构参数,不需要列出字段名对应埋点json的key名,而是将kafka消息直接将原始数据读取为纯字符串
CREATE TABLE nginx_log (
log STRING
) WITH (
'connector' = 'kafka',
'topic' = 'nginx_log',
'properties.bootstrap.servers' = 'localhost:9092',
'properties.group.id' = 'testGroup',
'format' = 'raw'
)
参考官网:Raw | Apache Flink
实际应用:
SET 'execution.checkpointing.interval' = '5s';
CREATE TABLE kafka_all_log_tmp (
log STRING
) WITH (
'connector' = 'kafka',
'topic' = 'TOPIC_NAME',
'properties.bootstrap.servers' = 'xxx.xx.xx.xxx:9092,xxx.xx.xx.xxx:9092,xxx.xx.xx.xxx:9092',
'properties.group.id' = 'GROUP_NAME',
'scan.startup.mode' = 'earliest-offset',
'format' = 'raw'
);
-- 用户行为日志
CREATE TABLE user_active_all_log_kafka(
ct TIMESTAMP,
log STRING
) WITH (
'connector' = 'doris',
'fenodes' = 'ipxxx:8200',
'username' = 'xxx',
'password' = 'xxx',
'table.identifier' = 'logger.user_active_all_log_kafka',
'sink.enable-delete' = 'true',
'sink.label-prefix' = 'doris_label-34xxxx'
);
-- 用户行为日志写入doris
insert into
user_active_all_log_kafka
select
TO_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP())) as ct,
log
from kafka_all_log_tmp
;