flink lookup join tps 测试 hbase mysql starrocks
org.apache.flink.connector.phoenix.internal.connection.PhoneixJdbcConnectionProvider
org.apache.flink.connector.phoenix.internal.connection.PhoneixJdbcConnectionProvider#getOrEstablishConnection
CREATE TABLE if not exists v2xRTData (
device STRING,
ip STRING,
payload ROW<id STRING, type String, packagesn String, mac String, content ROW<longitude String, latitude String, vehicle_speed String, vehicle_accel String, altitude String, vehicle_angle String, angle_speed String, slope String, star_num String, acc_state String, `timestamp` bigint, avg_speed String, vehicle_state String, mileage String>>,
`time` bigint ,
proc_time AS PROCTIME()
) WITH (
'connector' = 'kafka',
'topic' = 'v2xRTData',
'properties.bootstrap.servers' = 'bigdata-kafka-01:9092,bigdata-kafka-02:9092,bigdata-kafka-03:9092',
'properties.group.id' = 'dimTestGroup01',
'scan.startup.mode' = 'earliest-offset',
'properties.key.deserializer' = 'org.apache.kafka.common.serialization.StringDeserializer',
'properties.value.deserializer' = 'org.apache.kafka.common.serialization.StringDeserializer',
'format' = 'json',
'json.fail-on-missing-field' = 'true',
'json.ignore-parse-errors' = 'false'
);
### org.apache.flink.connector.phoenix.utils.PhoenixJdbcValidator 配置参数
CREATE TABLE if not exists dim_cap_terminal (
`rowkey` STRING,
`terminal_name` STRING,
`mac` STRING,
`car_id` STRING,
PRIMARY KEY (`rowkey`) NOT ENFORCED
) WITH (
'connector.type' = 'phoenix',
'connector.url' = 'jdbc:phoenix:bigdata-zookeeper-01,bigdata-zookeeper-02,bigdata-zookeeper-03:2181',
'connector.table' = 'dim_cap_terminal',
'connector.driver' = 'org.apache.phoenix.jdbc.PhoenixDriver',
'connector.username' = '',
'connector.password' = '',
'phoenix.schema.isnamespacemappingenabled' = 'true',
'phoenix.schema.mapsystemtablestonamespace' = 'true',
'connector.write.flush.max-rows' = '1' ,
'connector.lookup.cache.max-rows' = '1000',
'connector.lookup.cache.ttl' = '10000'
);
CREATE TABLE dim_v2x_topic (
ip string,
mac string,
car_id string
) WITH (
'connector' = 'kafka',
'topic' = 'dim_v2x_topic',
'properties.bootstrap.servers' = 'bigdata-kafka-01:9092,bigdata-kafka-02:9092,bigdata-kafka-03:9092',
'properties.group.id' = 'dimTestGroup01',
'scan.startup.mode' = 'earliest-offset',
'properties.key.deserializer' = 'org.apache.kafka.common.serialization.StringDeserializer',
'properties.value.deserializer' = 'org.apache.kafka.common.serialization.StringDeserializer',
'format' = 'json',
'json.fail-on-missing-field' = 'true',
'json.ignore-parse-errors' = 'false'
);
insert into dim_v2x_topic
select a.ip, a.payload.mac, ct.`car_id`
from
v2xRTData as a
left join
dim_cap_terminal FOR SYSTEM_TIME AS OF a.proc_time AS ct
ON ct.`mac` = a.payload.mac
;
- Lookup Cache
JDBC connector can be used in temporal join as a lookup source (aka.
dimension table). Currently, only sync lookup mode is supported.By default, lookup cache is not enabled. You can enable it by setting
both lookup.cache.max-rows and lookup.cache.ttl.
The lookup cache is used to improve performance of temporal join the JDBC connector
.By default, lookup cache is not enabled,
so all the
requests are sent to external database. When lookup cache is enabled,
each process (i.e. TaskManager) will hold a cache.Flink will lookup the cache first, and only send requests to external database when cache missing, and update cache with the rows returned. The oldest rows in cache will be expired when the cache hit to the max cached
rows lookup.cache.max-rows or when the row exceeds the max time
to live lookup.cache.ttl.The cached rows might not be the latest
, users can tunelookup.cache.ttl
to a smaller value to have
a better fresh data, but this may increase the number of requests send
to database. Sothis is a balance between throughput and correctness
.
多维度关联
insert into dim_v2x_topic
select a.payload.mac, t.`car_type`,ct.`car_id`,t.`train_no`,t.`train_name`,t.`vehicle_id`,sp.`platform_name`, sp.`ip` as platform_ip, sp.`port` as platform_port
from
v2xRTData as a
left join
dim_cap_terminal FOR SYSTEM_TIME AS OF a.proc_time AS ct
ON ct.`mac` = a.payload.mac
left join
dim_train FOR SYSTEM_TIME AS OF a.proc_time AS t
ON t.`rowkey` = ct.`car_id`
left join
dim_service_platform_train FOR SYSTEM_TIME AS OF a.proc_time AS spt
ON spt.`train_id` = t.`rowkey`
left join
dim_service_platform FOR SYSTEM_TIME AS OF a.proc_time AS sp
ON sp.`rowkey` = spt.`service_platform_id`
;
###
hbase(main):002:0> put 'DIM_CAP_TERMINAL','0ab965742e27425bbb73c3d3275d2ccb','INFO:CAR_ID','15e669a1cdfc478581f64031880a594a'
Took 0.1381 seconds
hbase(main):003:0> put 'DIM_TRAIN','15e669a1cdfc478581f64031880a594a','INFO:TRAIN_NO','train_no_0001'
Took 0.0107 seconds
hbase(main):004:0> put 'DIM_CAP_TERMINAL','604106ab9b734daaaeb6504983f7b992','INFO:CAR_ID','15e669a1cdfc478581f64031880a594a'
Took 0.0080 seconds
hbase(main):005:0> put 'DIM_TRAIN','15e669a1cdfc478581f64031880a594a','INFO:TRAIN_NO','train_no_0005'
Took 0.0152 seconds