笔者最近新需求需要在日志后面加入用户每个页面浏览的时间,由于日志中本身只有时间这个字段,没有浏览计时,最简单粗暴的方法就是后一条日志的时间减去前一条的时间,然后再设定一个超时阈值作为用户的超时时间,当一个用户长时间未操作时写回一个固定时间
第一个想到的是用window来做,由于flink也是在摸索之中,踩坑未果,然后使用ProcessionFunction完成(写完ProcessFunction又回头写了一下window,也解决了。汗。。。。。)
代码如下
package operator;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.util.Collector;
import pojo.LogBean;
import java.sql.Time;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.Date;
/**
* Created by IntelliJ IDEA.
* User: fr
* Time: 2020/2/28 14:26
*/
public class AddTimeProcessFunction extends KeyedProcessFunction<String,Tuple2<String, LogBean>, Tuple2<String, LogBean>> {
/**
* process function维持的状态
*/
private ValueState<CountWithTimestamp> state;
/**
* 设定的日志延迟时间
*/
private final long delay = 500;
@Override
public void open(Configuration parameters) throws Exception {
state = getRuntimeContext().getState(new ValueStateDescriptor<>("myState", CountWithTimestamp.class));
}
@Override
public void processElement(Tuple2<String, LogBean> value, Context ctx, Collector<Tuple2<String, LogBean>> out) throws Exception {
// retrieve the current count
// 获取当前key的状态
CountWithTimestamp current = state.value();
//判断状态是否存在
if (current == null || current.flag == 0) {
//将时间转换成long型
LocalDateTime parse = LocalDateTime.parse(value.f1.getOperTime(), DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
long nowEventTime = parse.toEpochSecond(ZoneOffset.of("+8"));
nowEventTime *= 1000 ;
current = new CountWithTimestamp();
current.key = value.f0;
current.logbean=value.f1;
current.flag=1;
current.lastModified = nowEventTime;
//更新状态到state中
state.update(current);
}else {
// set the state's timestamp to the record's assigned event time timestamp
LocalDateTime parse = LocalDateTime.parse(value.f1.getOperTime(), DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
long nowEventTime = parse.toEpochSecond(ZoneOffset.of("+8"));
nowEventTime *= 1000 ;
current.logbean.setGlobal1(String.valueOf(nowEventTime - current.lastModified));
out.collect(new Tuple2<>(current.key,current.logbean));
//System.err.println(current);
current.logbean=value.f1;
current.flag=1;
// schedule the next timer 60 seconds from the current event time
// 从当前事件时间开始计划下一个delay秒的定时器
ctx.timerService().registerEventTimeTimer(current.lastModified + delay);
// 将状态写回
state.update(current);
}
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple2<String, LogBean>> out) throws Exception {
// get the state for the key that scheduled the timer
//获取计划定时器的key的状态
CountWithTimestamp result = state.value();
// 检查是否是过时的定时器或最新的定时器
if (timestamp >= result.lastModified + delay) {
state.value().flag=0;
result.logbean.setGlobal1(String.valueOf(delay));
System.err.println(result);
// emit the state on timeout
out.collect(new Tuple2<String, LogBean>(result.key, result.logbean));
//清除此状态
state.clear();
}
}
}
class CountWithTimestamp {
public String key;
public LogBean logbean;
public long lastModified;
public int flag = 0;
public String getKey() {
return key;
}
public void setKey(String key) {
this.key = key;
}
public LogBean getLogbean() {
return logbean;
}
public void setLogbean(LogBean logbean) {
this.logbean = logbean;
}
public long getLastModified() {
return lastModified;
}
public void setLastModified(long lastModified) {
this.lastModified = lastModified;
}
public int getFlag() {
return flag;
}
public void setFlag(int flag) {
this.flag = flag;
}
public CountWithTimestamp() {
}
public CountWithTimestamp(String key, LogBean logbean, long lastModified, int flag) {
this.key = key;
this.logbean = logbean;
this.lastModified = lastModified;
this.flag = flag;
}
@Override
public String toString() {
return "CountWithTimestamp{" +
"key='" + key + '\'' +
", logbean=" + logbean +
", lastModified=" + lastModified +
", flag=" + flag +
'}';
}
}
值得一说的是
由于使用的是ctx.timerService().registerEventTimeTimer
,是eventTime,所以在主代码中要加上
//声明使用的是eventTime
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
//获取eventTime,转化为Long值
data.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<LogBean>() {
@Override
public long extractAscendingTimestamp(LogBean element) {
LocalDateTime parse = LocalDateTime.parse(element.getOperTime(), DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
long eventTime = parse.toEpochSecond(ZoneOffset.of("+8"));
//flink时间是精确到毫秒,日志中只转化到了秒,所以要乘一下
return eventTime * 1000;
}
})