flink ProcessionFunction 的使用以及踩到的一些坑

最新推荐文章于 2020-11-25 21:54:42 发布

cool0725

最新推荐文章于 2020-11-25 21:54:42 发布

阅读量255

点赞数

分类专栏：云计算大数据 flink

本文链接：https://blog.csdn.net/cool0725/article/details/104613343

版权

云计算大数据同时被 2 个专栏收录

4 篇文章 0 订阅

订阅专栏

flink

1 篇文章 0 订阅

订阅专栏

笔者最近新需求需要在日志后面加入用户每个页面浏览的时间，由于日志中本身只有时间这个字段，没有浏览计时，最简单粗暴的方法就是后一条日志的时间减去前一条的时间，然后再设定一个超时阈值作为用户的超时时间，当一个用户长时间未操作时写回一个固定时间
第一个想到的是用window来做，由于flink也是在摸索之中，踩坑未果，然后使用ProcessionFunction完成（写完ProcessFunction又回头写了一下window，也解决了。汗。。。。。）
代码如下

package operator;

import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.util.Collector;
import pojo.LogBean;

import java.sql.Time;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.Date;

/**
 * Created by IntelliJ IDEA.
 * User: fr
 * Time: 2020/2/28 14:26
 */

public class AddTimeProcessFunction extends KeyedProcessFunction<String,Tuple2<String, LogBean>, Tuple2<String, LogBean>> {

    /**
     * process function维持的状态
     */
    private ValueState<CountWithTimestamp> state;

    /**
     * 设定的日志延迟时间
     */
    private final long delay = 500;

    @Override
    public void open(Configuration parameters) throws Exception {
        state = getRuntimeContext().getState(new ValueStateDescriptor<>("myState", CountWithTimestamp.class));
    }

    @Override
    public void processElement(Tuple2<String, LogBean> value, Context ctx, Collector<Tuple2<String, LogBean>> out) throws Exception {
// retrieve the current count
        // 获取当前key的状态
        CountWithTimestamp current = state.value();

        //判断状态是否存在
        if (current == null || current.flag == 0) {
            //将时间转换成long型
            LocalDateTime parse = LocalDateTime.parse(value.f1.getOperTime(), DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
            long nowEventTime = parse.toEpochSecond(ZoneOffset.of("+8"));
            nowEventTime *= 1000 ;

            current = new CountWithTimestamp();
            current.key = value.f0;
            current.logbean=value.f1;
            current.flag=1;
            current.lastModified = nowEventTime;

            //更新状态到state中
            state.update(current);

        }else {
            // set the state's timestamp to the record's assigned event time timestamp
            LocalDateTime parse = LocalDateTime.parse(value.f1.getOperTime(), DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
            long nowEventTime = parse.toEpochSecond(ZoneOffset.of("+8"));
            nowEventTime *= 1000 ;

            current.logbean.setGlobal1(String.valueOf(nowEventTime - current.lastModified));
            out.collect(new Tuple2<>(current.key,current.logbean));
            //System.err.println(current);
            current.logbean=value.f1;
            current.flag=1;

            // schedule the next timer 60 seconds from the current event time
            // 从当前事件时间开始计划下一个delay秒的定时器
            ctx.timerService().registerEventTimeTimer(current.lastModified + delay);
            // 将状态写回
            state.update(current);

        }

    }

    @Override
    public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple2<String, LogBean>> out) throws Exception {
        // get the state for the key that scheduled the timer

        //获取计划定时器的key的状态
        CountWithTimestamp result = state.value();

        // 检查是否是过时的定时器或最新的定时器
        if (timestamp >= result.lastModified + delay) {
            state.value().flag=0;
            result.logbean.setGlobal1(String.valueOf(delay));
            
            System.err.println(result);
            // emit the state on timeout
            out.collect(new Tuple2<String, LogBean>(result.key, result.logbean));
            //清除此状态
            state.clear();

        }
    }

}
class CountWithTimestamp {

    public String key;
    public LogBean logbean;
    public long lastModified;
    public int flag = 0;

    public String getKey() {
        return key;
    }

    public void setKey(String key) {
        this.key = key;
    }

    public LogBean getLogbean() {
        return logbean;
    }

    public void setLogbean(LogBean logbean) {
        this.logbean = logbean;
    }

    public long getLastModified() {
        return lastModified;
    }

    public void setLastModified(long lastModified) {
        this.lastModified = lastModified;
    }

    public int getFlag() {
        return flag;
    }

    public void setFlag(int flag) {
        this.flag = flag;
    }

    public CountWithTimestamp() {
    }

    public CountWithTimestamp(String key, LogBean logbean, long lastModified, int flag) {
        this.key = key;
        this.logbean = logbean;
        this.lastModified = lastModified;
        this.flag = flag;
    }

    @Override
    public String toString() {
        return "CountWithTimestamp{" +
                "key='" + key + '\'' +
                ", logbean=" + logbean +
                ", lastModified=" + lastModified +
                ", flag=" + flag +
                '}';
    }
}

值得一说的是
由于使用的是ctx.timerService().registerEventTimeTimer，是eventTime，所以在主代码中要加上

//声明使用的是eventTime
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
//获取eventTime,转化为Long值
data.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<LogBean>() {
            @Override
            public long extractAscendingTimestamp(LogBean element) {
                LocalDateTime parse = LocalDateTime.parse(element.getOperTime(), DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
                long eventTime = parse.toEpochSecond(ZoneOffset.of("+8"));
                //flink时间是精确到毫秒，日志中只转化到了秒，所以要乘一下
                return eventTime * 1000;
            }
        })

cool0725

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
flink ProcessionFunction 的使用以及踩到的一些坑

笔者最近新需求需要在日志后面加入用户每个页面浏览的时间，由于日志中本身只有时间这个字段，没有浏览计时，最简单粗暴的方法就是后一条日志的时间减去前一条的时间，然后再设定一个超时阈值作为用户的超时时间，当一个用户长时间未操作时写回一个固定时间第一个想到的是用window来做，由于flink也是在摸索之中，踩坑未果，然后使用ProcessionFunction完成（写完ProcessFunction又...
复制链接

扫一扫