1. 找点击次数最活跃(即访问量最大)
package com.beibeixu.chapter05;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
* @author beibeiking
* @date 2022/09/01 16:16
* 归约聚合(reduce)
**/
public class TransformReduceTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//从元素中读取数据
DataStreamSource<Event> stream = env.fromElements(
new Event("Mary", "./home", 1000L),
new Event("Bob", "./cart", 2000L),
new Event("Alice", "./prod?id=100", 3000L),
new Event("Bob", "./prod?id=1", 3300L),
new Event("Bob", "./home", 3300L),
new Event("Alice", "./prod?id=200", 3200L),
new Event("Bob", "./prod?id=2", 3800L),
new Event("Bob", "./prod?id=3", 4200L)
);
//找点击次数最活跃(即访问量最大)
//1.统计每个用户的访问频次
SingleOutputStreamOperator<Tuple2<String, Long>> clicksByUser = stream.map(new MapFunction<Event, Tuple2<String, Long>>() {
@Override
public Tuple2<String, Long> map(Event event) throws Exception {
return Tuple2.of(event.user, 1L);
}
}).keyBy(data -> data.f0)
.reduce(new ReduceFunction<Tuple2<String, Long>>() {
@Override
public Tuple2<String, Long> reduce(Tuple2<String, Long> stringLongTuple2, Tuple2<String, Long> t1) throws Exception {
return Tuple2.of(stringLongTuple2.f0, stringLongTuple2.f1 + t1.f1);
}
});
clicksByUser.print("clicksByUser:");
//2.选取当前最活跃的用户,提取第二个字段最大值
SingleOutputStreamOperator<Tuple2<String, Long>> result = clicksByUser.keyBy(data -> "key")
.reduce(new ReduceFunction<Tuple2<String, Long>>() {
@Override
public Tuple2<String, Long> reduce(Tuple2<String, Long> stringLongTuple2, Tuple2<String, Long> t1) throws Exception {
return stringLongTuple2.f1 > t1.f1 ? stringLongTuple2 : t1;
//如果stringLongTuple2.f1大于t1.f1,则返回stringLongTuple2,否则返回t1
}
});
result.print("result:");
env.execute();
}
}
输出:
clicksByUser:> (Mary,1)
clicksByUser:> (Bob,1)
clicksByUser:> (Alice,1)
clicksByUser:> (Bob,2)
clicksByUser:> (Bob,3)
clicksByUser:> (Alice,2)
clicksByUser:> (Bob,4)
clicksByUser:> (Bob,5)
result:> (Mary,1)
result:> (Bob,1)
result:> (Alice,1)
result:> (Bob,2)
result:> (Bob,3)
result:> (Bob,3)
result:> (Bob,4)
result:> (Bob,5)