Flink-clickhousesink

1需求

需求:准备将一些明细数据存入clickhouse中,方便事件的在线检索。开发一个Flink的clickhouseSink;

flink版本 1.10 ,flink版本1.12 两个版本

ClickHouse的java api也是基于jdbc的接口来做的。所以按照 JDBCSinkFunction 继承RichSinkFunction来实现。并且还需要创建一个JDBCOutputFormat来用于真正的输出数据到clickhouse

2.代码

2.1JdbcSinkFunction 代码

package com.rongan.realtime.sink;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.FunctionInitializationContext;
import org.apache.flink.runtime.state.FunctionSnapshotContext;
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
public class JDBCSinkFunction<T> extends RichSinkFunction<T> implements CheckpointedFunction {
    final RAJDBCOutputFormat<T> outputFormat;

    public JDBCSinkFunction(RAJDBCOutputFormat outputFormat) {
        this.outputFormat = outputFormat;

    }


    @Override
    public void snapshotState(FunctionSnapshotContext context) {
        outputFormat.flush();
    }

    @Override
    public void initializeState(FunctionInitializationContext context) {
    }

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        RuntimeContext ctx = getRuntimeContext();
        outputFormat.setRuntimeContext(ctx);
        outputFormat.open(ctx.getIndexOfThisSubtask(), ctx.getNumberOfParallelSubtasks());
    }

    @Override
    public void close() throws Exception {
        outputFormat.close();
        super.close();
    }

    @Override
    public void invoke(T value, Context context) throws Exception {
        outputFormat.writeRecord(value);
    }
}

2.2JDBCOutputFormat代码

package com.rongan.realtime.sink;

import org.apache.flink.api.java.io.jdbc.AbstractJDBCOutputFormat;
import org.apache.flink.api.java.io.jdbc.JDBCOutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.lang.reflect.Field;
import java.sql.PreparedStatement;
import java.sql.SQLException;


public class RAJDBCOutputFormat<T> extends AbstractJDBCOutputFormat<T> {

    private static final long serialVersionUID = 1L;

    private static final Logger LOG = LoggerFactory.getLogger(JDBCOutputFormat.class);

    private final String query;
    private final int batchInterval;

    private PreparedStatement upload;
    private int batchCount = 0;

    public RAJDBCOutputFormat(String username, String password, String drivername,
                              String dbURL, String query, int batchInterval) {
        super(username, password, drivername, dbURL);
        this.query = query;
        this.batchInterval = batchInterval;
    }

    /**
     * Connects to the target database and initializes the prepared statement.
     *
     * @param taskNumber The number of the parallel instance.
     * @throws IOException Thrown, if the output could not be opened due to an
     *                     I/O problem.
     */
    @Override
    public void open(int taskNumber, int numTasks) throws IOException {
        try {
            establishConnection();
            upload = connection.prepareStatement(query);
        } catch (SQLException sqe) {
            throw new IllegalArgumentException("open() failed.", sqe);
        } catch (ClassNotFoundException cnfe) {
            throw new IllegalArgumentException("JDBC driver class not found.", cnfe);
        }
    }

    @Override
    public void writeRecord(T row) throws IOException {
        try {
            setRecordToStatement(upload, row);
            upload.addBatch();
        } catch (SQLException e) {
            throw new RuntimeException("Preparation of JDBC statement failed.", e);
        }

        batchCount++;

        if (batchCount >= batchInterval) {
            // execute batch
            flush();
        }
    }

    private void setRecordToStatement(PreparedStatement ps, T obj) {
        Field[] fields = obj.getClass().getDeclaredFields();
        //跳过的属性计数
        for (int i = 0; i < fields.length; i++) {
            Field field = fields[i];
            //设置私有属性可访问
            field.setAccessible(true);
            try {
                //获取属性值
                Object o = field.get(obj);
                ps.setObject(i + 1, o);
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    void flush() {
        try {
            upload.executeBatch();
            batchCount = 0;
        } catch (SQLException e) {
            throw new RuntimeException("Execution of JDBC statement failed.", e);
        }
    }

    int[] getTypesArray() {
        return null;
    }

    /**
     * Executes prepared statement and closes all resources of this instance.
     *
     * @throws IOException Thrown, if the input could not be closed properly.
     */
    @Override
    public void close() throws IOException {
        if (upload != null) {
            flush();
            try {
                upload.close();
            } catch (SQLException e) {
                LOG.info("JDBC statement could not be closed: " + e.getMessage());
            } finally {
                upload = null;
            }
        }

        closeDbConnection();
    }
}

 

2.3 demo 输入kafka数据往Clickhouse插入数据

click建表语句

create table t_rsd_incident(
    id String,
    src_ip String,
    dest_ip String,
    src_network_area  String,
    dest_network_area String,
    incident_time String,
    date String
 ) engine =MergeTree
 partition by date
   primary key (id)
   order by (id)

demo

package com.rongan.realtime.demo

import java.util.{Date, Properties}
import com.rongan.realtime.bean.Incident
import com.rongan.realtime.sink.{JDBCSinkFunction, RAJDBCOutputFormat}
import com.rongan.realtime.util.{DateUtils, FlinkUtil}
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011
import org.apache.flink.table.api.{Table, TableSchema}
import org.apache.flink.table.api.scala._
import org.apache.flink.types.Row
import org.apache.kafka.clients.consumer.ConsumerConfig

object WordCount2 {
  def main(args: Array[String]): Unit = {
    //1.创建 flink 执行环境
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI()
    val tableEnv: StreamTableEnvironment = FlinkUtil.initTableEnv(env)

    //2.创建kafkasource
    val props = new Properties()
    props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.7.128:9092")
    props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "TEST")

    val consumer = new FlinkKafkaConsumer011[String]("demo1", new SimpleStringSchema(), props)

    val kafkaDataStream: DataStream[String] = env.addSource(consumer).setParallelism(3) //创建kafkasourc流设置并行度为3
    //3.转换
    //    kafkaDataStream.print()
    tableEnv.createTemporaryView("kafkaStream", kafkaDataStream, 'message)

    //4.执行sql
    val table: Table = tableEnv.sqlQuery("select message as id,'192.168.1.1' as srcIp,'192.168.1.1' as destIp,'bsdf' as srcNetworkArea, 'sfse' as destNetworkArea," +
      "" +
      "DATE_FORMAT(LOCALTIMESTAMP,'yyyy-MM-dd HH:mm:ss' )as incidentTime from kafkaStream " +
      "")
    //4.转换为clickhouse表对应的实体bean类
    val incidentStream: DataStream[Incident] = tableEnv.toAppendStream[Row](table)
      .map(row => {
        val id: String = row.getField(0).asInstanceOf[String]
        val srcIp: String = row.getField(1).asInstanceOf[String]
        val destIp: String = row.getField(2).asInstanceOf[String]
        val srcNetworkArea: String = row.getField(3).asInstanceOf[String]
        val destNetworkArea: String = row.getField(4).asInstanceOf[String]
        val incidentTime: String = row.getField(5).asInstanceOf[String]
        new Incident(id, srcIp, destIp, srcNetworkArea, destNetworkArea, incidentTime, DateUtils.targetFormat(new Date(), DateUtils.YYYY_MM_DD))
      })

    //5.创建SinkFunction
    val format = new RAJDBCOutputFormat[Incident]("", "", "ru.yandex.clickhouse.ClickHouseDriver", "jdbc:clickhouse://192.168.7.123:8123/default"
      , "insert into t_rsd_incident values(?,?,?,?,?,?,?)", 1)
    val functionSinkFunction = new JDBCSinkFunction[Incident](format)
    
    incidentStream.addSink(functionSinkFunction)

    env.execute()

    //5.执行任务

    env.execute("job")
  }
}

case class Incident(id: String, srcIp: String, destIp: String, srcNetworkArea: String, destNetworkArea: String, incidentTime: String, Date: String)

 

  • 0
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值