flink侧输出流

该博客展示了如何使用Apache Flink从Kafka消费数据,进行实时ETL处理,包括数据过滤、转换和写入HBase及Kafka。通过设置检查点确保至少一次处理语义,并利用AsyncDataStream进行异步处理,同时处理主流程和侧输出流。
摘要由CSDN通过智能技术生成
package com.msxf.datamart.applyMain;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONException;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.serializer.SerializerFeature;
import com.msxf.datamart.common.SideOutEventType;
import com.msxf.datamart.cusBaseInfo.AppserverCusBaseInfoAsyncFunction;
import com.msxf.datamart.pojo.ApplyMainPojo;
import com.msxf.datamart.pojo.AppserverCusBaseInfoPojo;
import com.msxf.datamart.utils.KafkaPropertiesUtil;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.AsyncDataStream;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer010;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;
import java.util.Properties;
import java.util.concurrent.TimeUnit;

public class ApplyMainApp {

    private static final Logger LOG = LoggerFactory.getLogger(ApplyMainApp.class);

    public static void main(String[] args) throws Exception {
        if (args == null || args.length < 6) {
            throw new IllegalArgumentException(
                    "输入参数不正确,按照kafka brokers、topic、hbase zk、hbase projectName顺序依次输入");
        }

        String kafkaUrl = args[0];
        String topic = args[1];
        List<String> topics = KafkaPropertiesUtil.getTopics(topic);
        String datamart = args[2];
        //kafka ssl userName
        String userName = args[3];
        //kafka ssl password
        String password = args[4];
        String sinkKafkaUrl = "datamart-kfk-mdc-01.middlware.msxf.lo:9092,datamart-kfk-mdc-02.middlware.msxf.lo:9092,datamart-kfk-mdc-03.middlware.msxf.lo:9092";
        String sinkU = "appodn";
        String sinkP = "czledm5pira4qugxhv06";
        //数据集市集群
        //String outSideKafkaUrl = kafkaUrl;
        //String outSideU = userName;
        //String outSideP = password;
        String outSideKafkaUrl = "datamart-kfk-mdc-01.middlware.msxf.lo:9092,datamart-kfk-mdc-02.middlware.msxf.lo:9092,datamart-kfk-mdc-03.middlware.msxf.lo:9092";
        String outSideU = "appodn";
        String outSideP = "czledm5pira4qugxhv06";
        String groupId = args[5];
        String timeDuration = args[6];
        if (args.length > 7) {
            sinkKafkaUrl = kafkaUrl;
            sinkU = userName;
            sinkP = password;
        }

        System.setProperty("HADOOP_USER_NAME", "flink");
        // set up execution environment
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //非常关键,一定要设置启动检查点,设置最少一次处理语义和恰一次处理语义
        env.enableCheckpointing(120000, CheckpointingMode.EXACTLY_ONCE);
        env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
        // checkpoint的清楚策略
        CheckpointConfig checkpointConfig = env.getCheckpointConfig();
        checkpointConfig.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
        checkpointConfig.setCheckpointTimeout(60000);

        Properties binlogProps = KafkaPropertiesUtil.buildProperties(kafkaUrl, groupId, userName, password);
        Properties sinkProps = KafkaPropertiesUtil.buildProperties(sinkKafkaUrl, groupId, sinkU, sinkP);
        Properties sideOutProps = KafkaPropertiesUtil.buildProperties(outSideKafkaUrl, groupId, outSideU, outSideP);
        Properties sideOutSinkProps = KafkaPropertiesUtil.buildProperties(outSideKafkaUrl, groupId, outSideU, outSideP);

        FlinkKafkaConsumer010<String> binlogConsumer = new FlinkKafkaConsumer010<>(topics, new SimpleStringSchema(), binlogProps);
        FlinkKafkaConsumer010<String> sideOutConsumer = new FlinkKafkaConsumer010<>("datamart_tag_sideout", new SimpleStringSchema(), sideOutProps);
        //FlinkKafkaConsumer010<String> sideOutConsumer = new FlinkKafkaConsumer010<>("tianshu_sideout_online", new SimpleStringSchema(), sideOutProps);

        //解析binlog 生成row
        DataStreamSource<String> binlogSource = env.addSource(binlogConsumer);
        //将sideOut转换成row
        SingleOutputStreamOperator<String> sideSource = env.addSource(sideOutConsumer)
                .filter((FilterFunction<String>) value -> {
                    JSONObject json = JSON.parseObject(value);
                    String flag = json.getString("flag");
                    return SideOutEventType.APPLY_MAIN.getCode().equals(flag);
                });
        //合并流
        DataStream<String> source = binlogSource.union(sideSource);
        //关联hbase
        SingleOutputStreamOperator<String> sinkSource = AsyncDataStream.unorderedWait(
                source,
                new ApplyMainAsyncFunction(datamart),
                10000,
                TimeUnit.MILLISECONDS,
                20
        );

        //未关联上的判断超时时间,过滤,写入侧输出
        OutputTag<String> sideOut = new OutputTag<String>("sideOut") {
        };
        SingleOutputStreamOperator<String> result = sinkSource.process(new ProcessFunction<String, String>() {
            @Override
            public void processElement(String value, Context ctx, Collector<String> out) throws Exception {
                JSONObject json = JSON.parseObject(value);
                String flag = json.getString("flag");
                String time = json.getString("time");
                if (SideOutEventType.APPLY_MAIN.getCode().equals(flag)) {
                    if (System.currentTimeMillis() - Long.parseLong(time) <= Integer.parseInt(timeDuration)) {
                        ctx.output(sideOut, value);
                    } else {
                        out.collect(convert(value));
                    }
                } else {
                    out.collect(value);
                }
            }
        });
        result.getSideOutput(sideOut)
                .addSink(new FlinkKafkaProducer010<>(
                        "datamart_tag_sideout",
                        new SimpleStringSchema(),
                        sideOutSinkProps)).name("sideOutSink");
        //主流写入sink
        result.addSink(new FlinkKafkaProducer010<>(
                "dwd_cdp_apply_main",
                new SimpleStringSchema(),
                sinkProps)).name("KafkaSink");
        env.execute("ApplyMainApp");

    }

    public static String convert(String input) {
        JSONObject json;
        try {
            json = JSON.parseObject(input);
            ApplyMainPojo pojo = new ApplyMainPojo();
            pojo.setEtl_record_type(json.getString("etl_record_type"));
            pojo.setEtl_time(json.getString("etl_time"));
            pojo.setUnique_id(json.getString("unique_id"));
            pojo.setUser_id(null);
            pojo.setCreate_time(json.getString("time_inst"));
            pojo.setUpdate_time(json.getString("time_upd"));
            pojo.setAppl_channel(json.getString("apply_channel"));
            pojo.setAppl_prod_cd(json.getString("product_code"));
            pojo.setReal_channel_appl(json.getString("cooperator_id"));
            pojo.setApply_no(json.getString("apply_no"));
            pojo.setApply_mode_enums(json.getString("apply_type"));
            String s = JSON.toJSONString(pojo, SerializerFeature.WriteMapNullValue);
            return s;
        } catch (JSONException e) {
            LOG.warn("ROW : {} convert error", input);
            return input;
        }
    }
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值