UVStatMultiPlans(GitHub)项目持续收集各种高性能实时uv实现方案并对各种实现方案的优缺点进行对比分析!
需求描述
消费Kafka日志行为数据,统计每分钟用户每个页面的uv访问量。
Kafka数据格式
{"userId":"c61b801e-22e7-4238-8f67-90968a40f2a7","page":"page_1","behaviorTime":1692247408129}
{"userId":"c61b801e-22e7-4238-8f67-90968a40f2a7","page":"page_2","behaviorTime":1692247408129}
代码实现
文件:UVStatPlan2.class
public class UVStatPlan2 {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(5);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.getConfig().setAutoWatermarkInterval(TimeUnit.MINUTES.toMillis(10));
Properties kafkaProperties = new Properties();
kafkaProperties.setProperty("bootstrap.servers", SysConst.KAFKA_BOOTSTRAP_SERVERS);
kafkaProperties.setProperty("group.id","groupId_" + System.currentTimeMillis());
kafkaProperties.setProperty("auto.offset.reset","latest");
FlinkKafkaConsumer<String> consumer =
new FlinkKafkaConsumer<String>(SysConst.KAFKA_TOPIC_NAME, new SimpleStringSchema(), kafkaProperties);
DataStream<UserBehavior> dataStream = env.addSource(consumer).map(x -> {
UserBehavior userBehavior = null;
try{
userBehavior = JsonUtil.toJavaObject(x,UserBehavior.class);
}catch (Exception ex){
ex.printStackTrace();
}
return userBehavior;
}).assignTimestampsAndWatermarks
(WatermarkStrategy.<UserBehavior>forMonotonousTimestamps().withTimestampAssigner((SerializableTimestampAssigner<UserBehavior>)
(userBehavior, l) -> userBehavior.getBehaviorTime()));
dataStream.keyBy((KeySelector<UserBehavior, String>) UserBehavior::getPage).window(TumblingEventTimeWindows.of(Time.seconds(30)))
.trigger(new TimeIntervalTrigger<>(5,TimeUnit.SECONDS))
.process(new BaseStateProcessWindowFunction())
.map(x -> {
System.out.println("key:" + x.page + ",window time:" + DateUtil.formatTimeStamp(x.windowTime,"yyyy-MM-dd HH:mm:ss") + ",uv:" + x.uv);
return null;
});
env.execute();
}
}
文件:BaseStateProcessWindowFunction.class
public class BaseStateProcessWindowFunction extends ProcessWindowFunction<UserBehavior, PageUVResult, String, TimeWindow> {
private static final StateTtlConfig stateTtlConfig = StateTtlConfig
.newBuilder(Time.hours(3))
.setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite)
.setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired)
.build();
private transient ValueState<Long> uvState;
private transient MapState<String,Boolean> userMapState;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
ValueStateDescriptor<Long> uvStateDescriptor= new ValueStateDescriptor("value",Long.class);
MapStateDescriptor<String, Boolean> userMapDescriptor = new MapStateDescriptor( "userIsExistMap", String.class, Boolean.class);
uvStateDescriptor.enableTimeToLive(stateTtlConfig);
userMapDescriptor.enableTimeToLive(stateTtlConfig);
uvState = getRuntimeContext().getState(uvStateDescriptor);
userMapState = getRuntimeContext().getMapState(userMapDescriptor);
}
@Override
public void process(String page, Context context, Iterable<UserBehavior> iterable, Collector<PageUVResult> collector) throws Exception {
if (uvState.value() == null) {
uvState.update(0L);
}
long uv = uvState.value();
for (UserBehavior userBehavior : iterable) {
String userId = userBehavior.getUserId();
if (!userMapState.contains(userId)) {
userMapState.put(userId, true);
uv++;
}
}
uvState.update(uv);
collector.collect(PageUVResult.of(page,context.window().getEnd(),uv));
}
}
完整代码已上传至:https://github.com/xl-xueling/uvstatmultiplans.git