使用Flink将CSV文件导入ClickHouse中

参考:https://github.com/ivi-ru/flink-clickhouse-sink

1.环境

   <dependency>
      <groupId>ru.ivi.opensource</groupId>
      <artifactId>flink-clickhouse-sink</artifactId>
      <version>1.3.1</version>
    </dependency>

2.资源准备

2.1 数据准备

time,X,Y
2017-01-01,-154.45,65.55 
2017-01-01,-154.45,65.65 
2017-01-01,-154.45,65.75 
2017-01-01,-154.45,65.85 
2017-01-01,-154.45,65.95 
2017-01-01,-154.45,66.05 
2017-01-01,-154.35,48.05 
2017-01-01,-154.35,48.15 
2017-01-01,-154.35,48.25 
2017-01-01,-154.35,48.35 

2.2 建表语句

CREATE TABLE test.Weather
(
    `time` String,
    `X` Float64,
    `Y` Float64
)
ENGINE = MergeTree
PARTITION BY time
ORDER BY time;

3.代码

3.1 Bean

public class Weather {
    public String time;
    public float X;
    public float Y;
    
    public Weather(String time, float x, float y) {
        this.time = time;
        X = x;
        Y = y;
    }
     public static Weather of (String time, float x, float y) {
        return new Weather(time, x, y);
    }

    public static String convertToCsv(Weather weather) {
        StringBuilder builder = new StringBuilder();
        builder.append("(");

        // add time
        builder.append(weather.time);
        builder.append(", ");

        // add X
        builder.append(weather.X);
        builder.append(", ");

        // add Y
        builder.append(weather.Y);

        builder.append(" )");
        return builder.toString();
    }
 
}

3.2 ClickHouseSink

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import ru.ivi.opensource.flinkclickhousesink.ClickHouseSink;
import ru.ivi.opensource.flinkclickhousesink.model.ClickHouseClusterSettings;
import ru.ivi.opensource.flinkclickhousesink.model.ClickHouseSinkConst;

import java.util.HashMap;
import java.util.Properties;

/**
 * @program: Code_Project
 * @ClassName ClickHouseSink
 * @description: 使用Flink将CSV文件导入ClickHouse
 * @author: 77
 * @create: 2021-08-16 09:50
 * @Version 1.0
 **/
public class Weather_Data_Sink {
    public static void main(String[] args) throws Exception {
        //创建流执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        HashMap<String, String> parameters = new HashMap<>();

        //clickhouse 配置
        //IP和端口
        parameters.put(ClickHouseClusterSettings.CLICKHOUSE_HOSTS,"http://node04:8123/");
        //用户名
        parameters.put(ClickHouseClusterSettings.CLICKHOUSE_USER,"");
        //密码
        parameters.put(ClickHouseClusterSettings.CLICKHOUSE_PASSWORD,"");

        //sink common
        //线程池中定时监测任务时间间隔,单位:秒
        parameters.put(ClickHouseSinkConst.TIMEOUT_SEC, "1");
        //任务失败时记录信息存放路径
        parameters.put(ClickHouseSinkConst.FAILED_RECORDS_PATH, "d:/");
        //并行度
        parameters.put(ClickHouseSinkConst.NUM_WRITERS, "2");
        //最大重试次数
        parameters.put(ClickHouseSinkConst.NUM_RETRIES, "2");
        //队列最大容量
        parameters.put(ClickHouseSinkConst.QUEUE_MAX_CAPACITY,"2");
        //负责在主线程中引发(false)或不(true) ClickHouse发送异常。
        //为true,则忽略clickhouse发送时的异常,失败的数据将自动进入磁盘。
        //为false,则clickhouse发送异常会在“主线程”(调用ClickhHouseSink::invoke的线程)中抛出,数据也会进入磁盘。
        //磁盘路径为上面设置的磁盘路径
        parameters.put(ClickHouseSinkConst.IGNORING_CLICKHOUSE_SENDING_EXCEPTION_ENABLED, "false");

        //set global parameters
        ParameterTool parameterTool = ParameterTool.fromMap(parameters);
        env.getConfig().setGlobalJobParameters(parameterTool);
        //并行度
        env.setParallelism(1);

        String filePath = "D:\\weather.csv";
        
        //source
        DataStreamSource<String> inputStream = env.readTextFile(filePath);
        SingleOutputStreamOperator<String> dataStream = inputStream.map(new MapFunction<String, String>() {
            @Override
            public String map(String data) throws Exception {
                String[] split = data.split(",");
                Weather weather = Weather.of(split[0],
                        Float.parseFloat(split[1]),
                        Float.parseFloat(split[2]));
                return Weather.convertToCsv(weather);
            }
        });

        //sink props
        Properties props = new Properties();
        //目标表
        props.put(ClickHouseSinkConst.TARGET_TABLE_NAME, "test.weather");
        //
        props.put(ClickHouseSinkConst.MAX_BUFFER_SIZE, "10000");
        ClickHouseSink sink = new ClickHouseSink(props);
        dataStream.addSink(sink);
        dataStream.print();
        env.execute("test");

    }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值