目录
map
package com.atguigu.chapter05;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
* @author psl
* @create 2022/5/3 10:44
* @desc map算子的用法
*/
public class TransformMapTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStreamSource<Event> stream = env.fromElements(new Event("Mary", "./home", 1000L),
new Event("Bob", "./cart", 2000L), new Event("Tom", "./prod", 3000L));
//匿名类实现mapfunction接口
SingleOutputStreamOperator<String> map1 = stream.map(new MapFunction<Event, String>() {
@Override
public String map(Event event) throws Exception {
return event.user;
}
});
map1.print();
//lambda表达式
SingleOutputStreamOperator<String> map2 = stream.map(data -> data.user);
map2.print();
env.execute();
}
}
flatmap
package com.atguigu.chapter05;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
/**
* @author psl
* @create 2022/5/3 11:31
* @desc flatmap算子的用法
*/
public class TransformFlatMapTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStreamSource<Event> stream = env.fromElements(new Event("Mary", "./home", 1000L),
new Event("Bob", "./cart", 2000L), new Event("Tom", "./prod", 3000L));
stream.flatMap((Event value, Collector<String> out) ->{
if(value.user.equals("Mary")){
out.collect(value.url);
}
else if(value.user.equals("Bob")){
out.collect(value.user);
out.collect(value.url);
out.collect(value.timestamp.toString());
}
}).returns(new TypeHint<String>() {
}).print();
env.execute ();
}
}
filter
package com.atguigu.chapter05;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
* @author psl
* @create 2022/5/3 11:25
* @desc filter算子的用法
*/
public class TransformFilterTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStreamSource<Event> stream = env.fromElements(new Event("Mary", "./home", 1000L),
new Event("Bob", "./cart", 2000L), new Event("Tom", "./prod", 3000L));
stream.filter(data -> data.user.equals("Mary")).print("Mary click");
env.execute();
}
}
reduce
package com.atguigu.chapter05;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
* @author psl
* @create 2022/5/3 13:46
* @desc reduce算子的使用
*/
public class TransformReduceTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStreamSource<Event> stream = env.fromElements(new Event("Mary", "./home", 1000L),
new Event("Bob", "./cart", 3000L),
new Event("Tom", "./home", 3000L),
new Event("Bob", "./home", 1000L),
new Event("Tom", "./home", 3000L),
new Event("Bob", "./prof", 4400L),
new Event("Tom", "./prod", 5000L));
//统计每个用户的访问频次
SingleOutputStreamOperator<Tuple2<String, Long>> clickByUser = stream.map(new MapFunction<Event, Tuple2<String, Long>>() {
@Override
public Tuple2<String, Long> map(Event value) throws Exception {
return Tuple2.of(value.user, 1L);
}
}).keyBy(data -> data.f0)
.reduce(new ReduceFunction<Tuple2<String, Long>>() {
@Override
public Tuple2<String, Long> reduce(Tuple2<String, Long> value1, Tuple2<String, Long> value2) throws Exception {
return Tuple2.of(value1.f0, value1.f1 + value2.f1);
}
});
//根据个数选取最活跃用户
SingleOutputStreamOperator<Tuple2<String, Long>> result = clickByUser.keyBy(data -> "key")
.reduce(new ReduceFunction<Tuple2<String, Long>>() {
@Override
public Tuple2<String, Long> reduce(Tuple2<String, Long> value1, Tuple2<String, Long> value2) throws Exception {
return value1.f1 > value2.f1 ? value1 : value2;
}
});
result.print();
env.execute();
}
}
simpleagg
package com.atguigu.chapter05;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
* @author psl
* @create 2022/5/3 13:13
* @desc Aggregation算子的用法(keyBy+max+maxBy)
*/
public class TransformSimpleAggTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStreamSource<Event> stream = env.fromElements(new Event("Mary", "./home", 1000L),
new Event("Bob", "./cart", 3000L),
new Event("Bob", "./home", 1000L),
new Event("Bob", "./prof", 4400L),
new Event("Tom", "./home", 3000L),
new Event("Tom", "./prod", 5000L));
stream.keyBy(data ->data.user)
.maxBy("timestamp")
.print();
stream.keyBy(data ->data.user)
.max("timestamp")
.print();
env.execute();
}
}