Flink消费Kafka插入Clickhouse

7 篇文章 0 订阅
3 篇文章 0 订阅

Flink消费Kafka插入Clickhouse


一、Maven依赖

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.11</artifactId>
            <version>1.13.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>1.13.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.11</artifactId>
            <version>1.13.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>3.1.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_2.11</artifactId>
            <version>1.13.1</version>
        </dependency>

        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-simple</artifactId>
            <version>1.7.36</version>
        </dependency>

        <!-- json解析 -->
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>2.0.3</version>
        </dependency>

        <dependency>
            <groupId>ru.yandex.clickhouse</groupId>
            <artifactId>clickhouse-jdbc</artifactId>
            <version>0.3.2</version>
        </dependency>

二、Job类

 public static void main(String[] args) {
        try {
            /** 一、 Kafka Source Properties */
            Properties properties = new Properties();

            properties.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, BROKERS);
            properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, GROUP_ID);
            //自动检测topic分区变化时间间隔
            properties.put("flink.partition-discovery.interval-millis", INTERVAL_MILLIS);
            properties.put("refresh.leader.backoff.ms", REFRESH_MS);

            /** 二、 Kafka Source */
            KafkaSource<String> kafkaSource = KafkaSource.<String>builder()
                    .setTopics(TOPIC)
                    .setValueOnlyDeserializer(new SimpleStringSchema())
                    .setProperties(properties)
                    .setStartingOffsets(OffsetsInitializer.latest())
                    .build();

            /** 三、 创建flink流式执行环境 */
            final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
            env.enableCheckpointing(120000L, CheckpointingMode.EXACTLY_ONCE);
            env.getCheckpointConfig().setMinPauseBetweenCheckpoints(180000L);
            env.getConfig().setAutoWatermarkInterval(0);

            /** 四、消费 Source */
            env.fromSource(kafkaSource, WatermarkStrategy.noWatermarks(), "kafkaSource_sourceName")
                    .uid("kafkaSource_sourceName_uid").name("kafkaSource_sourceName_name").setParallelism(1)

                    .flatMap(new KafkaSourceFlatMap()).uid("KafkaSourceFlatMap_uid").name("KafkaSourceFlatMap_name").setParallelism(1)
                    .flatMap(new ClickHouseSinkFlatMap(Constants.PARALL)).uid("ClickHouseSinkFlatMap_uid").name("ClickHouseSinkFlatMap_name").setParallelism(1);


            env.execute("KafkaToClickHouseJob");
        } catch (Exception e) {
            LOG.error("KafkaToClickHouseJob启动失败!", e);
        }
    }

二、Kafka FlatMap算子

public class KafkaSourceFlatMap extends RichFlatMapFunction<String, Map<String, String>> {
    private static final Logger log = LoggerFactory.getLogger(KafkaSourceFlatMap.class);

    @Override
    public void flatMap(String str, Collector<Map<String, String>> out) {
        try {
            //CK字段初始化
            String user_id;                                 
            String event_type;                              
            String label_value;                             
            String recv_date;
            String recv_time;                               
			//清洗逻辑...
			
            out.collect(listToMap(user_id.trim(),
                    event_type.trim(),
                    label_value.trim(),
                    recv_time));

    }
    
    public Map<String, String> listToMap(String... str) {
        Map<String, String> resMap = new HashMap<>();
        for (int i = 0; i < str.length; i++) {
            resMap.put((i + 1) + Constants.NULL, str[i]);
        }
        return resMap;
    }
} 

三、Clickhouse FlatMap算子

public class ClickHouseSinkFlatMap extends RichFlatMapFunction<Map<String, String>, String> {

    private static final Logger LOG = LoggerFactory.getLogger(ClickHouseSinkFlatMap.class);
    private int batch_size;
    private int count = 0;
    private int total = 0;
    private ClickHouseConnection connection = null;
    private PreparedStatement preparedStatement = null;

    public ClickHouseSinkFlatMap(int batch_size) {
        this.batch_size = batch_size;
    }

    @Override
    public void flatMap(Map<String, String> map, Collector<String> collector) {
        try {

            if (connection == null) {
                connection = getConn();

                preparedStatement = connection.prepareStatement(Constants.INSERT_SQL);
            }

            for (int i = 1; i < map.size()+1; i++) {
                    preparedStatement.setString(i, map.get(i + Constants.NULL));
            }

            preparedStatement.addBatch();

            count = count + 1;
            try {
                if (count >= batch_size) {
                    preparedStatement.executeBatch();

                    total += count;
                    LOG.info("已经成功插入{}条数据!!!",total);

                    preparedStatement.clearBatch();
                    count = 0;
                }
            } catch (Exception ee) {
                LOG.error("数据插入clickhouse 报错:", ee);
            }
        } catch (Exception ex) {
            LOG.error("ClickhouseSink插入报错====", ex);
        }
    }

    public static ClickHouseConnection getConn() {

        int socketTimeout = 600000;
        ClickHouseProperties properties = new ClickHouseProperties();
        properties.setUser(Constants.USERNAME);
        properties.setPassword(Constants.PASSWORD);
        properties.setDatabase(Constants.DATABASE);
        properties.setMaxRetries(3);
        properties.setSocketTimeout(socketTimeout);
        ClickHouseDataSource clickHouseDataSource = new ClickHouseDataSource(Constants.URL, properties);
        ClickHouseConnection conn = null;
        try {
            conn = clickHouseDataSource.getConnection();
            return conn;
        } catch (SQLException e) {
        }
        return null;
    }

}

四、Clickhouse建表

-- 本地表
CREATE TABLE IF NOT EXISTS rawdata.user_tag_local ON CLUSTER default
(
    user_id String,
    event_type String,
    label_value String,
    recv_date Date,
    recv_time DateTime64(3, 'Asia/Istanbul')
)
ENGINE = MergeTree()
PARTITION BY recv_date
ORDER BY recv_time


--分布式表
CREATE TABLE IF NOT EXISTS test.user_tag ON CLUSTER default
AS test.user_tag_local
Engine = Distributed("default" , "test" , "user_tag_local" , rand());

  • 0
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值