连接PostgreSQL和ClickHouse数据库 - 从PostgreSQL读取多个表数据 - 将数据写入ClickHouse数据库
1. 连接PostgreSQL和ClickHouse数据库
public static void main(String[] args) throws Exception {
//1.获取执行环境及源数据配置
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Configuration conf = new Configuration();
//设置WebUI绑定的本地端口
conf.setString(RestOptions.BIND_PORT, "8081");
//本地开发环境
// StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
env.enableCheckpointing(5000, CheckpointingMode.AT_LEAST_ONCE);//每个5秒执行一次
// env.setParallelism(1);//并行度2
//读取配置文件properties里面的数据库连接信息
DataBaseConfig config = new DataBaseConfig();
//2.通过flinkcdc构建sourceFunction,三个表分开创建流
SourceFunction<SourcePojo> alarmRec = getSourceFunction(config);
//3.从源数据提取数据插入clickhouse
DataStreamSource<SourcePojo> alarmSource = env.addSource(alarmRec);
//转换数据
SourceData.TransData(alarmSource);
env.execute();
}
/**
* 通过flinkcdc构建sourceFunction
*
* @param config 数据库连接配置文件
* @return 返回流数据
*/
public static SourceFunction<SourcePojo> getSourceFunction(DataBaseConfig config) {
String host = config.SOURCE_HOST;
int port = config.SOURCE_PORT;
String dataBase = config.SOURCE_DATABASE;
String schemaList = config.SOURCE_SCHEMALIST;
String userName = config.SOURCE_USERNAME;
String password = config.SOURCE_PASSWORD;
String pluge = config.SOURCE_PLUGE;
String[] tableName = config.SOURCE_TABLELIST;
String slot = config.SOURCE_SLOTNAME;
//系统配置
Properties properties = new Properties();
//always:全量+增量 never:增量
properties.setProperty("snapshot.mode", "never");
properties.setProperty("debezium.slot.name", "pg_cdc");
properties.setProperty("debezium.slot.drop.on.stop", "true");
properties.setProperty("include.schema.changes", "true");
//使用连接器配置属性启用定期心跳记录生成
properties.setProperty("heartbeat.interval.ms", String.valueOf(DEFAULT_HEARTBEAT_MS));
SourceFunction<SourcePojo> sourceFunction = PostgreSQLSource.<SourcePojo>builder()
.hostname(host)
.port(port)
.database(dataBase)
.schemaList(schemaList)
.tableList(tableName)
.username(userName)
.password(password)
.slotName(slot)
.decodingPluginName(pluge) // use pgoutput for PostgreSQL 10+
.deserializer(new CustomerDeserialization())
.debeziumProperties(properties)
.build();
return sourceFunction;
}
2. 从PostgreSQL读取多个表数据的数据库连接配置,以及clickhouse的连接配置
SOURCE.HOST=192.168.1.1
SOURCE.PORT=5432
SOURCE.DATABASE=data_dev
SOURCE.SCHEMALIST=public
SOURCE.TABLELIST=public.sys_equip_alarm_rec_2024,public.sys_equip_alarm_rec_push_rec_2024,public.sys_equip_alarm_processing_status_rec_2024
SOURCE.USERNAME=postgres
SOURCE.PASSWORD=123
SOURCE.SLOTNAME=flink
SOURCE.PLUGE=pgoutput
CLICKHOUSE.URL=jdbc:clickhouse:http://192.168.1.1:8123/xs
CLICKHOUSE.DRIVENAME=com.clickhouse.jdbc.ClickHouseDriver
CLICKHOUSE.USER=default
CLICKHOUSE.PASSWORD=default-123
3. 将数据写入ClickHouse数据库
/**
* 数据流写入ck
* @param sql 操作的sql
* @return 结果
* @param <T> 操作对象
*/
public static <T>SinkFunction<T> getSink(String sql){
System.out.println(sql+"@@@@@@@@");
DataBaseConfig config=new DataBaseConfig();
return JdbcSink.<T>sink(
sql,
// 对sql语句进行预编译
new JdbcStatementBuilder<T>(){
@Override
public void accept(PreparedStatement preparedStatement, T t) throws SQLException {
Field[] fields=t.getClass().getDeclaredFields();
try {
for (int i = 0; i < fields.length; i++) {
Field f = fields[i];
f.setAccessible(true);
Object value= f.get(t);
preparedStatement.setObject(i+1 , value);
}
}catch (Exception e){
e.printStackTrace();
}
}
},
// 设置批量插入数据
new JdbcExecutionOptions.Builder().withBatchSize(5).build(),
// 设置ClickHouse连接配置
new JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
.withDriverName(config.cli