场景,kafka数据实时落地到doris,doris + 维表做关联查询,可通过datart ,superset 等BI 工具,展示BI
Doris DDL 建表:
CREATE TABLE `ods_sale_tr_goods_stream` (
`dt` CHAR(8) NOT NULL,
`bill_code` VARCHAR(100) NOT NULL COMMENT '-- 订单号 transnumber',
`prykey` VARCHAR(100) NULL,
`pt` datetime NULL,
`ts` datetime NULL,
`retailnumber` VARCHAR(10) NULL COMMENT '-- 流水号',
`dates` date NULL COMMENT '-- 订单日期',
`businessdaydate` CHAR(8) NULL COMMENT '-- 订单日期',
`gj_time` CHAR(6) NULL COMMENT '-- 订单时间',
`goods_id` VARCHAR(50) NULL COMMENT '-- 商品id materialnumber01',
`org_no` VARCHAR(10) NULL COMMENT '-- 门店id retailstoreid',
`sale_man_id` VARCHAR(100) NULL COMMENT '-- 营业员编码 commemployeeid01',
`disctypecode` VARCHAR(20) NULL COMMENT '-- 折扣类型',
`batch_code` VARCHAR(100) NULL COMMENT '-- 批次号 batchid',
`tendertypecode` VARCHAR(20) NULL COMMENT '-- 付款方式',
`operatorid` VARCHAR(100) NULL COMMENT '-- 收银员',
`channel_order_id` VARCHAR(100) NULL COMMENT '-- 第三方单号 customernumber',
`origtransnumber` VARCHAR(100) NULL COMMENT '-- 原单号',
`refund_no` VARCHAR(10) NULL COMMENT '-- 退货标识 thbs',
`chnl` VARCHAR(100) NULL COMMENT '-- 渠道',
) ENGINE=OLAP
DUPLICATE KEY(`dt`, `bill_code`)
COMMENT 'car实时订单'
DISTRIBUTED BY HASH(`bill_code`) BUCKETS 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"is_being_synced" = "false",
"storage_medium" = "hdd",
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false"
);;
FlinkSQL 作业:
-- kafka 写入 drois 表
-- 注册hive
CREATE CATALOG hive WITH (
'type' = 'hive',
'hive-conf-dir' = '/opt/conf',
'hadoop-conf-dir'='/etc/hadoop/conf'
);
USE CATALOG hive;
USE dl_test;
SET 'table.exec.sink.not-null-enforcer'='drop';
SET 'sql-client.execution.result-mode'='TABLEAU';
-- kafka 连接表 先重置一下
-- kafka-consumer-groups --bootstrap-server xxxxxxxxxxx:9092 --group prod_flink_group_doris --reset-offsets --topic xxxxxxxxxx --to-datetime 2024-09-24T16:00:00.000 --execute
-- kafka-console-consumer --bootstrap-server xxxxxxxxxxxx:9092 --topic bdp_datax_sapabap1_wormhole --consumer-property group.id=prod_flink_group_doris
drop TABLE IF EXISTS `hive`.`dl_source_kafka`.`ods_kafka_sale_tr_goods_stream_for_doris`;
CREATE TABLE `hive`.`dl_source_kafka`.`ods_kafka_sale_tr_goods_stream_for_doris` (
`pt` as proctime(),
`prykey` string,
`retailnumber` string,
`transnumber` string,
`businessdaydate` string,
`gj_time` string,
`materialnumber01` string,
`retailstoreid` string,
`commemployeeid01` string,
`disctypecode` string,
`batchid` string,
`tendertypecode` string,
`operatorid` string,
`customernumber` string,
`origtransnumber` string,
`thbs` string,
`chnl` string,
`ts` AS TO_TIMESTAMP(CONCAT(CAST(TO_DATE(`businessdaydate`, 'yyyyMMdd') AS STRING), ' ', CONCAT_WS(':', SUBSTR(`gj_time`, 1, 2), SUBSTR(`gj_time`, 3, 2), SUBSTR(`gj_time`, 5, 2))))
)WITH (
'properties.auto.commit.interval.ms' = '1000',
'canal-json.ignore-parse-errors' = 'true',
'format' = 'canal-json',
'properties.bootstrap.servers' = 'xxxxxxxxxxx:9092',
'connector' = 'kafka',
'topic' = 'bdp_datax_sapabap1_wormhole',
'scan.startup.mode' = 'group-offsets',
'properties.auto.offset.reset.strategy' = 'earliest',
'properties.group.id' = 'prod_flink_group_doris',
'properties.enable.auto.commit' = 'true'
);
-- drois表 v2 表
drop TABLE IF EXISTS `hive`.`dl_ods`.`ods_doris_sale_tr_goods_stream`;
CREATE TABLE IF NOT EXISTS `hive`.`dl_ods`.`ods_doris_sale_tr_goods_stream` (
`dt` string,
`bill_code` string,
`prykey` string,
`pt` TIMESTAMP,
`ts` TIMESTAMP,
`retailnumber` string,
`dates` DATE,
`businessdaydate` string,
`gj_time` string,
`goods_id` string,
`org_no` string,
`sale_man_id` string,
`disctypecode` string,
`batch_code` string,
`tendertypecode` string,
`operatorid` string,
`channel_order_id` string,
`origtransnumber` string,
`refund_no` string,
`chnl` string
)
WITH (
'connector' = 'doris',
'fenodes' = 'xxxxxxxxxx:8030',
'table.identifier' = 'gjst.ods_sale_tr_goods_stream',
'username' = 'xxxx',
'password' = 'xxxx',
'sink.label-prefix' = 'car-kafka-doris-20240925-3' -- 每次启动任务不要一样
);
-- job
INSERT INTO `hive`.`dl_ods`.`ods_doris_sale_tr_goods_stream`
SELECT
businessdaydate as dt,
`transnumber` as bill_code,
`prykey`,
proctime() as pt, -- 入湖的时间
`ts`, -- 业务发生的event_time
`retailnumber`,
TO_DATE(cast(ts as string),'yyyy-MM-dd') as dates, -- 以订单创建时间为单位分区
`businessdaydate`,
`gj_time`,
`materialnumber01` as goods_id,
`retailstoreid` as org_no,
`commemployeeid01` as sale_man_id,
`disctypecode`,
`batchid` as batch_code,
`tendertypecode`,
`operatorid`,
`customernumber` as channel_order_id,
`origtransnumber`,
`thbs` as refund_no,
`chnl`
FROM
`hive`.`dl_source_kafka`.`ods_kafka_sale_tr_goods_stream_for_doris`
;