通过flink将kafka中的数据存储到clickhouse中.
写入数据的 Clickhouse 的测试步骤如下。
(1)添加依赖
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc_2.11</artifactId>
<version>1.12.7</version>
</dependency>
<dependency>
<groupId>ru.yandex.clickhouse</groupId>
<artifactId>clickhouse-jdbc</artifactId>
<version>0.3.2</version>
</dependency>
(2)启动 clickhouse,在 test1 库下建表 source_log
CREATE TABLE test1.source_log
(
`id` String,
`src_ip` String,
`src_port` Int32,
`dst_ip` String,
`dst_port` Int32,
`protocol` String
)
ENGINE = MergeTree
ORDER BY id;
(3)编写输出到 Clickhouse的示例代码
import com.clickhouse.jdbc.ClickHouseDriver;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.flink.WuYuan;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.jdbc.JdbcConnectionOptions;
import org.apache.flink.connector.jdbc.JdbcSink;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.types.Row;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.util.Properties;
import java.util.UUID;
public class FlinkReadKafka {
public static void main(String[] args) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
StreamExecutionEnvironment executionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
//1. source 数据源
//参数1: topic 参数2: 相当于之前的stringserilize 参数3: kafka的基本配置
Properties properties = new Properties();
properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "ip:端口号");
properties.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
properties.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
DataStreamSource<String> sourceStream = executionEnvironment.addSource(new FlinkKafkaConsumer<>("wangluo", new SimpleStringSchema(), properties));
//简单的打印每条来自kafka的数据
//sourceStream.print();
//2. transform 数据处理 将json字符串转成java对象
SingleOutputStreamOperator<Row> singleOutputStreamOperator = sourceStream.map(jsonStr -> {
WuYuan wuYuan = objectMapper.readValue(jsonStr, WuYuan.class);
Row row = new Row(6);
row.setField(0, UUID.randomUUID().toString());
row.setField(1, wuYuan.getSrcIp());
row.setField(2, wuYuan.getSrcPort());
row.setField(3, wuYuan.getDstIp());
row.setField(4, wuYuan.getDstPort());
row.setField(5, wuYuan.getProtocol());
return row;
});
singleOutputStreamOperator.addSink(JdbcSink.sink(
"insert into source_log values(?,?,?,?,?,?)",
(preparedStatement, row) -> {
preparedStatement.setObject(1, row.getField(0));
preparedStatement.setObject(2, row.getField(1));
preparedStatement.setObject(3, row.getField(2));
preparedStatement.setObject(4, row.getField(3));
preparedStatement.setObject(5, row.getField(4));
preparedStatement.setObject(6, row.getField(5));
},
new JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
.withDriverName(ClickHouseDriver.class.getName())
.withUrl("jdbc:clickhouse://ip:端口号/test1?socket_timeout=600000")
.withUsername("root")
.withPassword("hexin")
.build()
));
//启动flink
executionEnvironment.execute();
}
}