Flink的流处理状态管理

1.这是一个普通的flink流处理任务,读取kafka的数据,然后输出。

package com.flink.java.project;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple5;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.windows.GlobalWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.util.Collector;
import org.apache.kafka.clients.consumer.ConsumerConfig;

import java.util.Iterator;
import java.util.Properties;


public class FlinkUserJob {

    public static void main(String[] args) throws Exception {
        // 获取env
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //设置并行度
        env.setParallelism(3);
        //checkpoint的周期
//        env.enableCheckpointing(10000);
//        //设置模式为:exactly_one,仅一次语义
//        env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
//        //checkpoint最小间隔
//        env.getCheckpointConfig().setMinPauseBetweenCheckpoints(10000);
//        //检查点必须在1分钟之内完成,或者被丢弃【checkpoint超时时间】
//        env.getCheckpointConfig().setCheckpointTimeout(10000);
//        //同一时间只允许进行一次检查点
//        env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
//        //表示一旦Flink程序被cancel后,会保留checkpoint数据
//        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
//        // 设置state backend
//        env.setStateBackend(new FsStateBackend("file:///D:\\YYYY"));


        //设置kafka消费参数
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.10.15:9092");
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "flink-k2k_01");
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");;
        FlinkKafkaConsumer011<String> kafkaConsumer011 = new FlinkKafkaConsumer011<String>("user_data",new SimpleStringSchema(), props);

        kafkaConsumer011.setStartFromLatest();
        //加入kafka数据源
        DataStreamSource<String> userSource = env.addSource(kafkaConsumer011).setParallelism(1);

        userSource.map(new MapFunction<String, Tuple5<String,String,String,String,String>>() {
            @Override
            public Tuple5<String,String,String,String,String> map(String str) throws Exception {
                String[] split = str.split(",");
                return new Tuple5<>(split[0],split[1],split[2],split[3],split[4]);
            }
        }).keyBy(0)
          .countWindow(3)
          .apply(new WindowFunction<Tuple5<String,String, String,String,String>, Object, Tuple, GlobalWindow>() {
            @Override
            public void apply( Tuple tuple,  GlobalWindow window,  Iterable<Tuple5<String, String, String,String, String>> input,  Collector<Object> out) {
                Iterator<Tuple5<String, String,String,String,String>> iterator = input.iterator();
                UserInfo user=new UserInfo();
                while (iterator.hasNext()) {
                    Tuple5<String,String,String, String,String> tp = iterator.next();
                    switch (tp.f4){
                        //用户id uname birthday   sex f1
                        case "f1": {
                            user.setUserID(tp.f0);
                            user.setName(tp.f1);
                            user.setAge(tp.f2);
                            user.setSex(tp.f3);
                        };
                        //uid,categoryid,productid,time,f2
                        case "f2": {
                            user.setPro(tp.f3);
                        };
                        //userid  productid amount time f3
                        case "f3": {
                            user.setMoney(tp.f2);
                            user.setTime(tp.f3);
                        };
                    }

                }
                out.collect( user.toString());
            }

        }).print();

        env.execute(FlinkUserJob.class.getName());

    }
}

2.完善代码后加入状态的管理,如下

package com.flink.java.project;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple5;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.util.Collector;
import org.apache.kafka.clients.consumer.ConsumerConfig;


import java.util.Properties;
/**
 *  @Author BaronND
 *  @Description 消费kafka消息,
 *  如果数据量大的话,加上状态管理
 *
 */
public class FlinkUserJob_v2 {
    public static void main(String[] args) throws Exception {
        // 获取env
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //设置并行度
        env.setParallelism(3);
        //checkpoint的周期
        env.enableCheckpointing(10000);
        //设置模式为:exactly_one,仅一次语义
        env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
//        //checkpoint最小间隔
//        env.getCheckpointConfig().setMinPauseBetweenCheckpoints(10000);
//        //检查点必须在1分钟之内完成,或者被丢弃【checkpoint超时时间】
//        env.getCheckpointConfig().setCheckpointTimeout(10000);
//        //同一时间只允许进行一次检查点
//        env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
//        //表示一旦Flink程序被cancel后,会保留checkpoint数据
//        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
        // 设置state backend
        env.setStateBackend(new FsStateBackend("file:///D:\\tmp\\flinkStateBackend"));

        //设置kafka消费参数
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.10.15:9092");
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "flink-k2k_01");
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");;
        FlinkKafkaConsumer011<String> kafkaConsumer011 = new FlinkKafkaConsumer011<String>("user_data",new SimpleStringSchema(), props);

        kafkaConsumer011.setStartFromLatest();
        //加入kafka数据源
        DataStreamSource<String> userSource = env.addSource(kafkaConsumer011).setParallelism(1);

        userSource.map(new MapFunction<String, Tuple5<String,String,String,String,String>>() {
            @Override
            public Tuple5<String,String,String,String,String> map(String str) throws Exception {
                String[] split = str.split(",");
                return new Tuple5<>(split[0],split[1],split[2],split[3],split[4]);
            }
        }).keyBy(0)
          .flatMap(new CountWindowPrint())
          .print();

        env.execute(FlinkUserJob.class.getName());

    }

}
class CountWindowPrint extends RichFlatMapFunction<Tuple5<String, String, String,String, String>, UserInfo> {

    private static final long serialVersionUID = 1808329479322205953L;
    /**
     *  //用户id,名字,性别,年龄分段,产品,金额,下单时间
     */
    private transient ValueState<Tuple2<Integer,UserInfo>> data;

    StateTtlConfig ttlConfig =
            StateTtlConfig.newBuilder(Time.seconds(1)) //它是生存时间值
                    .setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite)
                    //状态可见性配置是否在读取访问时返回过期值
                    .setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired)
                    .build();

    @Override
    public void flatMap(Tuple5<String, String, String,String, String> input, Collector<UserInfo> out) throws Exception {

        //用户id,名字,性别,年龄分段,产品,金额,下单时间
        Tuple2<Integer, UserInfo> tuple2 = data.value();

        // update the count
        int count=tuple2.f0 +1;

        Tuple5<String,String,String, String,String> tp = input;
        final UserInfo user = tuple2.f1;
        switch (tp.f4){
            //用户id uname birthday   sex f1
            case "f1": {
                user.setUserID(tp.f0);
                user.setName(tp.f1);
                user.setAge(tp.f2);
                user.setSex(tp.f3);
            };
            //uid,categoryid,productid,time,f2
            case "f2": {
                user.setPro(tp.f3);
            };
            //userid  productid amount time f3
            case "f3": {
                user.setMoney(tp.f2);
                user.setTime(tp.f3);
            };
        }

        data.update(Tuple2.of(count,user));
        // if the count reaches 2, emit the average and clear the state
        if (count >= 3) {
            out.collect(user);//sout
            data.clear();
        }
    }

    @Override
    public void open(Configuration config) {
        ValueStateDescriptor<Tuple2<Integer,UserInfo>> descriptor =
                new ValueStateDescriptor<>(
                        "userInfo", // the state name
                        TypeInformation.of(new TypeHint<Tuple2<Integer,UserInfo>>() {}), // type information
                       new Tuple2<>(0,new UserInfo())); // default value of the state, if nothing was set
        //设置stage过期时间
        //descriptor.enableTimeToLive(ttlConfig);
        data = getRuntimeContext().getState(descriptor);
    }
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值