flink: 使用ProcessFunction实现数据分流(sideOutput)

package cn.edu.tju.demo;

import org.apache.flink.api.common.functions.*;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.scala.codegen.TypeDescriptors;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.connector.jdbc.JdbcConnectionOptions;
import org.apache.flink.connector.jdbc.JdbcSink;
import org.apache.flink.runtime.state.changelog.StateChange;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.checkpoint.ListCheckpointed;
import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.datastream.*;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.api.functions.co.CoMapFunction;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch7.ElasticsearchSink;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.apache.http.HttpHost;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.kafka.common.serialization.StringSerializer;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;

import java.util.*;

public class Test29 {
    private static String HOST_NAME = "xx.xx.xx.xx";
    private static int PORT = 9999;
    private static String DELIMITER ="\n";
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment environment = StreamExecutionEnvironment
                .getExecutionEnvironment();
        //environment.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);



        DataStream<String> socketDataInfo =  environment.socketTextStream(HOST_NAME, PORT, DELIMITER);
        SingleOutputStreamOperator<DataInfo> dataInfoStream = socketDataInfo.map(new MapFunction<String, DataInfo>() {
            @Override
            public DataInfo map(String value) throws Exception {

                String[] stringList = value.split(",");
                DataInfo dataInfo = new DataInfo(Long.parseLong(
                        stringList[0]), stringList[1], Double.parseDouble(stringList[2]));
                return dataInfo;
            }
        });

        OutputTag<DataInfo> myOutputTag = new OutputTag<DataInfo>("low"){};
        SingleOutputStreamOperator<DataInfo> resultStream = dataInfoStream.process(new ProcessFunction<DataInfo, DataInfo>() {

            @Override
            public void processElement(DataInfo value, ProcessFunction<DataInfo, DataInfo>.Context ctx, Collector<DataInfo> out) throws Exception {
                //value大于10,分流到主流resultStream,否则分到侧流low
                if (value.getVal() > 10) {

                    out.collect(value);
                } else {
                    ctx.output(myOutputTag, value);
                }
            }
        });


        resultStream.print("result");
        resultStream.getSideOutput(myOutputTag).print("side");
        environment.execute("my job");
    }



    public static class DataInfo{
        private long ts;
        private String info;
        private double val;

        public long getTs() {
            return ts;
        }

        public void setTs(long ts) {
            this.ts = ts;
        }

        public String getInfo() {
            return info;
        }

        public void setInfo(String info) {
            this.info = info;
        }

        public double getVal() {
            return val;
        }

        public void setVal(double val) {
            this.val = val;
        }

        @Override
        public String toString() {
            return "DataInfo{" +
                    "ts=" + ts +
                    ", info='" + info + '\'' +
                    ", val='" + val + '\'' +
                    '}';
        }

        public DataInfo(long ts, String info, double val) {
            this.ts = ts;
            this.info = info;
            this.val = val;
        }

        public DataInfo() {

        }
    }
}

  • 9
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Flink中,可以使用ProcessFunction来进行流的分流操作。通过扩展ProcessFunction<JSONObject, JSONObject>类,可以实现对流数据的处理和分流操作。具体实现的代码如下所示: ```java public class MyProcessFunction extends ProcessFunction<JSONObject, JSONObject> { @Override public void processElement(JSONObject value, Context ctx, Collector<JSONObject> out) throws Exception { // 在这里可以对输入的流数据进行处理 // 然后根据需要将处理结果发送到不同的分流输出 if (value.containsKey("type") && value.getString("type").equals("A")) { ctx.output(new OutputTag<JSONObject>("output-A"){}, value); } else if (value.containsKey("type") && value.getString("type").equals("B")) { ctx.output(new OutputTag<JSONObject>("output-B"){}, value); } else { ctx.output(new OutputTag<JSONObject>("output-other"){}, value); } } } ``` 在上述代码中,我们首先扩展了ProcessFunction<JSONObject, JSONObject>类,并重写了processElement方法。在该方法中,我们可以对输入的JSONObject数据进行处理,并根据一定的条件将数据发送到不同的分流输出。在这个例子中,如果数据的type字段为"A",则将数据发送到名为"output-A"的分流输出中;如果type字段为"B",则发送到"output-B";否则发送到"output-other"。具体的分流输出可以在Flink程序中进行定义和处理。 需要注意的是,上述代码仅是一个简单的示例,实际使用时需要根据具体的业务需求进行适当的修改和扩展。<span class="em">1</span><span class="em">2</span><span class="em">3</span> #### 引用[.reference_title] - *1* [Flink 使用 ProcessFunction 处理时间乱序数据](https://blog.csdn.net/zx711166/article/details/123730586)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_2"}}] [.reference_item style="max-width: 50%"] - *2* *3* [Flink实时处理并将结果写入ElasticSearch实战](https://blog.csdn.net/weixin_44516305/article/details/90258883)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_2"}}] [.reference_item style="max-width: 50%"] [ .reference_list ]

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值