Flink-1.11.2-code_demo
一.source
1.1.FromCollection
package com.flink.source;
import com.flink.bean.WaterSensor;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import java.util.ArrayList;
public class FromCollection {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
ArrayList<WaterSensor> waterSensorsLists = new ArrayList<>();
waterSensorsLists.add(new WaterSensor("001", 123456L, 100));
waterSensorsLists.add(new WaterSensor("002", 123457L, 200));
waterSensorsLists.add(new WaterSensor("003", 123458L, 300));
waterSensorsLists.add(new WaterSensor("004", 123459L, 400));
waterSensorsLists.add(new WaterSensor("005", 123460L, 500));
DataStreamSource<WaterSensor> ds = env.fromCollection(waterSensorsLists);
ds.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
1.2.FromFile
package com.flink.source;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class FromFile {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//From local
//DataStreamSource<String> ds = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\water_sensor.txt");
//From hdfs
DataStreamSource<String> ds_hdfs = env.readTextFile("hdfs://hadoop162:8020/data/water_sensor.txt");
ds_hdfs.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
1.3.FromKafka
package com.flink.source;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import java.util.Properties;
public class FromKafka {
public static void main(String[] args) {
Properties props = new Properties();
props.put("bootstrap.servers", "192.168.1.162:9092");//kafka集群,broker-list
props.put("acks", "all");
props.put("retries", 1);//批次大小
props.put("linger.ms", 1);//等待时重试次数
props.put("batch.size", 16384);//间
props.put("buffer.memory", 33554432);//RecordAccumulator缓冲区大小
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("auto.offset.reset", "earliest"); //earliest,latest
Producer<String, String> producer = new KafkaProducer<>(props);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> dss = env.addSource(new FlinkKafkaConsumer011<String>(
"water_sensor"
,new SimpleStringSchema()
,props
));
dss.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
1.4.1.CustomizeSource
package com.flink.source;
import com.flink.bean.WaterSensor;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import java.util.Random;
public class CustomizeSource implements SourceFunction<WaterSensor> {
boolean flag = true;
@Override
public void run(SourceContext<WaterSensor> ctx) throws Exception {
Random random = new Random();
while (flag) {
ctx.collect(
new WaterSensor(
"sensor_" + (random.nextInt(3) + 1),
System.currentTimeMillis(),
random.nextInt(10) + 40
)
);
Thread.sleep(1000L);
}
}
@Override
public void cancel() {
flag=false;
}
}
1.4.1.FromCustomizeSource
package com.flink.source;
import com.flink.bean.WaterSensor;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class FromCustomizeSource {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<WaterSensor> ds = env.addSource(new CustomizeSource());
ds.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
二.transform
2.1.T_Map_FlatMap
package com.flink.transform;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.util.Collector;
public class T_Map_FlatMap {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStreamSource<String> ds = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\word_count.txt");
// ds.flatMap((s, out) -> {
// String[] line = s.split(" ");
// for (String s1 : line) {
// out.collect(s1);
// }
// })
// .print();
ds.map((in)->{
String[] line = in.split(" ");
String out="";
for (String s : line) {
out+=s+"\n";
}
return out;
})
.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
2.2.T_Filter
package com.flink.transform;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
public class T_Filter {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStreamSource<String> ds = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\word_count.txt");
ds.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String in, Collector<String> out) throws Exception {
String[] line = in.split(" ");
for (String word : line) {
out.collect(word);
}
}
})
.filter((in)->in.toString().contains("a"))
.print();
try {
env.execute("filter");
} catch (Exception e) {
e.printStackTrace();
}
}
}
2.3.T_KeyBy
package com.flink.transform;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
public class T_KeyBy {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> ds = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\word_count.txt");
ds.flatMap(new FlatMapFunction<String, Tuple2<String,Integer>>() {
@Override
public void flatMap(String in, Collector<Tuple2<String,Integer>> out) throws Exception {
String[] line = in.split(" ");
for (String s : line) {
out.collect(new Tuple2(s,1));
}
}
})
.keyBy(0)
.reduce(new ReduceFunction<Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> reduce(Tuple2<String, Integer> now, Tuple2<String, Integer> next) throws Exception {
return new Tuple2<String,Integer>(now.f0,new Integer(now.f1+next.f1));
}
})
.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
2.4.T_Shuffle
package com.flink.transform;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
public class T_Shuffle {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
DataStreamSource<String> ds = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\word_count.txt");
ds.flatMap(new FlatMapFunction<String, Tuple2<String,Integer>>() {
@Override
public void flatMap(String in, Collector<Tuple2<String,Integer>> out) throws Exception {
String[] line = in.split(" ");
for (String s : line) {
out.collect(new Tuple2(s,1));
}
}
})
.shuffle()
.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
2.5.T_Split_Select
package com.flink.transform;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
import java.util.Arrays;
public class T_Split_Select {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStreamSource<String> ds = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\word_count.txt");
ds.flatMap(new FlatMapFunction<String, Tuple2<String,Integer>>() {
@Override
public void flatMap(String in, Collector<Tuple2<String,Integer>> out) throws Exception {
String[] line = in.split(" ");
for (String s : line) {
out.collect(new Tuple2(s,1));
}
}
})
.keyBy(0)
.reduce(new ReduceFunction<Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> reduce(Tuple2<String, Integer> now, Tuple2<String, Integer> next) throws Exception {
return new Tuple2<String,Integer>(now.f0,new Integer(now.f1+next.f1));
}
})
.split(new OutputSelector<Tuple2<String, Integer>>() {
@Override
public Iterable<String> select(Tuple2<String, Integer> in) {
if(in.f0.contains("a")){
return Arrays.asList("Contains_a");
}else if(in.f0.contains("b")){
return Arrays.asList("Contains_b");
}else if(in.f0.contains("c")){
return Arrays.asList("Contains_c");
}else {
return Arrays.asList("other");
}
}
})
.select("Contains_b")
.print("Contains_b");
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
2.6.T_Connect_Union
package com.flink.transform;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.datastream.ConnectedStreams;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SplitStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
import java.util.Arrays;
public class T_Connect_Union {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStreamSource<String> ds = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\word_count.txt");
SplitStream<Tuple2<String, Integer>> streamSplits = ds.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String in, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] line = in.split(" ");
for (String s : line) {
out.collect(new Tuple2(s, 1));
}
}
})
.keyBy(0)
.reduce(new ReduceFunction<Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> reduce(Tuple2<String, Integer> now, Tuple2<String, Integer> next) throws Exception {
return new Tuple2<String, Integer>(now.f0, new Integer(now.f1 + next.f1));
}
})
.split(new OutputSelector<Tuple2<String, Integer>>() {
@Override
public Iterable<String> select(Tuple2<String, Integer> in) {
if (in.f0.contains("a")) {
return Arrays.asList("Contains_a");
} else if (in.f0.contains("b")) {
return Arrays.asList("Contains_b");
} else if (in.f0.contains("c")) {
return Arrays.asList("Contains_c");
} else {
return Arrays.asList("other");
}
}
});
DataStream<Tuple2<String, Integer>> contains_a = streamSplits.select("Contains_a");
DataStream<Tuple2<String, Integer>> contains_b = streamSplits.select("Contains_b");
DataStream<Tuple2<String, Integer>> contains_c = streamSplits.select("Contains_c");
DataStream<Tuple2<String, Integer>> other = streamSplits.select("other");
//.print("Contains_b");
ConnectedStreams<Tuple2<String, Integer>, Tuple2<String, Integer>> a_connect_b = contains_a.connect(contains_b);
//a_connect_b.process ...
DataStream<Tuple2<String, Integer>> a_union_c = contains_a.union(contains_c);
// a_union_c.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
三.window
3.1.time_window
3.1.1.TumblingWindow
package com.flink.window.time_window;
import com.flink.bean.WaterSensor;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
public class TumblingWindow {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//DataStreamSource<String> dsFile = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\water_sensor.txt");
DataStreamSource<String> dsSocket = env.socketTextStream("hadoop162",9999);
//
//dsFile.setParallelism(1);
SingleOutputStreamOperator<WaterSensor> dsWaterSensor = dsSocket.map(line -> {
String[] lineValue = line.split(",");
return new WaterSensor(lineValue[0], Long.valueOf(lineValue[1]), Integer.valueOf(lineValue[2]));
});
dsWaterSensor
.keyBy(value->value.getId())
.timeWindow(Time.seconds(5))
.reduce((now, next) -> new WaterSensor(now.getId(), next.getTs() + now.getTs(), next.getVc()
+ now.getVc()))
.print();
//dsWaterSensor.print("waterSensor");
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
3.1.2.SlidingWindow
package com.flink.window.time_window;
import com.flink.bean.WaterSensor;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
public class SlidingWindow {
public static void main(String[] args) {
//StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//DataStreamSource<String> dsFile = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\water_sensor.txt");
DataStreamSource<String> dsSocket = env.socketTextStream("hadoop162",9999);
//
//dsFile.setParallelism(1);
SingleOutputStreamOperator<WaterSensor> dsWaterSensor = dsSocket.map(line -> {
String[] lineValue = line.split(",");
return new WaterSensor(lineValue[0], Long.valueOf(lineValue[1]), Integer.valueOf(lineValue[2]));
});
dsWaterSensor
.keyBy(value->value.getId())
.timeWindow(Time.seconds(5),Time.seconds(3))
.reduce((now, next) -> new WaterSensor(now.getId(), next.getTs() + now.getTs(), next.getVc()
+ now.getVc()))
.print();
//dsWaterSensor.print("waterSensor");
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
3.1.3.SessionWindow
package com.flink.window.time_window;
import com.flink.bean.WaterSensor;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.ProcessingTimeSessionWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
public class SessionWindow {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> dsSocket = env.socketTextStream("hadoop162",9999);
//
//dsFile.setParallelism(1);
SingleOutputStreamOperator<WaterSensor> dsWaterSensor = dsSocket.map(line -> {
String[] lineValue = line.split(",");
return new WaterSensor(lineValue[0], Long.valueOf(lineValue[1]), Integer.valueOf(lineValue[2]));
});
dsWaterSensor
.keyBy(value->value.getId())
.window(ProcessingTimeSessionWindows.withGap(Time.seconds(3)))
.reduce((now, next) -> new WaterSensor(now.getId(), next.getTs() + now.getTs(), next.getVc()
+ now.getVc()))
.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
3.2.count_window
3.2.1.TumblingWindow
package com.flink.window.count_window;
import com.flink.bean.WaterSensor;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
public class TumblingWindow {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//DataStreamSource<String> dsFile = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\water_sensor.txt");
DataStreamSource<String> dsSocket = env.socketTextStream("hadoop162",9999);
//
//dsFile.setParallelism(1);
SingleOutputStreamOperator<WaterSensor> dsWaterSensor = dsSocket.map(line -> {
String[] lineValue = line.split(",");
return new WaterSensor(lineValue[0], Long.valueOf(lineValue[1]), Integer.valueOf(lineValue[2]));
});
dsWaterSensor
.keyBy(value->value.getId())
.countWindow(3)
.reduce((now, next) -> new WaterSensor(now.getId(), next.getTs() + now.getTs(), next.getVc()
+ now.getVc()))
.print();
//dsWaterSensor.print("waterSensor");
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
3.2.2.SlidingWindow
package com.flink.window.count_window;
import com.flink.bean.WaterSensor;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
public class SlidingWindow {
public static void main(String[] args) {
//StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//DataStreamSource<String> dsFile = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\water_sensor.txt");
DataStreamSource<String> dsSocket = env.socketTextStream("hadoop162",9999);
//
//dsFile.setParallelism(1);
SingleOutputStreamOperator<WaterSensor> dsWaterSensor = dsSocket.map(line -> {
String[] lineValue = line.split(",");
return new WaterSensor(lineValue[0], Long.valueOf(lineValue[1]), Integer.valueOf(lineValue[2]));
});
dsWaterSensor
.keyBy(value->value.getId())
//没出现两条数据执行一次,执行范围为3条数据 (特殊的:首次出现两条数据直接计算)
.countWindow(3,2)
.reduce((now, next) -> new WaterSensor(now.getId(), next.getTs() + now.getTs(), next.getVc()
+ now.getVc()))
.print();
//dsWaterSensor.print("waterSensor");
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
3.3.window_api
//其他窗口API
//trigger() —— 触发器:定义 window 什么时候关闭,触发计算并输出结果
//evitor() —— 移除器: 定义移除某些数据的逻辑
//allowedLateness() —— 允许处理迟到的数据
//sideOutputLateData() —— 将迟到的数据放入侧输出流
//getSideOutput() —— 获取侧输出流
3.3.1.incremental_aggregation_functions
//增量聚合函数
IAF_AggregateFunction
package com.flink.window.window_api.incremental_aggregation_functions;
import com.flink.bean.WaterSensor;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.table.expressions.In;
public class IAF_AggregateFunction {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//DataStreamSource<String> dsFile = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\water_sensor.txt");
DataStreamSource<String> dsSocket = env.socketTextStream("hadoop162",9999);
//
//dsFile.setParallelism(1);
SingleOutputStreamOperator<WaterSensor> dsWaterSensor = dsSocket.map(line -> {
String[] lineValue = line.split(",");
return new WaterSensor(lineValue[0], Long.valueOf(lineValue[1]), Integer.valueOf(lineValue[2]));
});
dsWaterSensor
.keyBy(value->value.getId())
.timeWindow(Time.seconds(10))
.aggregate(new AggregateFunction<WaterSensor, Integer, Integer>() {
//初始值
@Override
public Integer createAccumulator() {
System.out.println("init...");
return 0;
}
//累加
@Override
public Integer add(WaterSensor value, Integer accumulator) {
System.out.println("add...");
return accumulator+1;
}
//获得结果
@Override
public Integer getResult(Integer accumulator) {
System.out.println("result...");
return accumulator;
}
//会话窗口 才会调用:合并累加器的结果
@Override
public Integer merge(Integer acc1, Integer acc2) {
return acc1+acc2;
}
}).print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
IAF_ReduceFunction
package com.flink.window.window_api.incremental_aggregation_functions;
import com.flink.bean.WaterSensor;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
public class IAF_ReduceFunction {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//DataStreamSource<String> dsFile = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\water_sensor.txt");
DataStreamSource<String> dsSocket = env.socketTextStream("hadoop162",9999);
//
//dsFile.setParallelism(1);
SingleOutputStreamOperator<WaterSensor> dsWaterSensor = dsSocket.map(line -> {
String[] lineValue = line.split(",");
return new WaterSensor(lineValue[0], Long.valueOf(lineValue[1]), Integer.valueOf(lineValue[2]));
});
dsWaterSensor
.keyBy(value->value.getId())
.timeWindow(Time.seconds(5))
.reduce((now, next) -> new WaterSensor(now.getId(), next.getTs() + now.getTs(), next.getVc()
+ now.getVc()))
.print();
//dsWaterSensor.print("waterSensor");
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
3.3.2.full_window_functions
//全窗口函数
FWF_ProcessWindowFunction
package com.flink.window.window_api.full_window_functions;
import com.flink.bean.WaterSensor;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
public class FWF_ProcessWindowFunction {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//DataStreamSource<String> dsFile = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\water_sensor.txt");
DataStreamSource<String> dsSocket = env.socketTextStream("hadoop162",9999);
//
//dsFile.setParallelism(1);
SingleOutputStreamOperator<WaterSensor> dsWaterSensor = dsSocket.map(line -> {
String[] lineValue = line.split(",");
return new WaterSensor(lineValue[0], Long.valueOf(lineValue[1]), Integer.valueOf(lineValue[2]));
});
//升序
dsWaterSensor
.keyBy(value->value.getId())
.timeWindow(Time.seconds(10))
// 先存数据,等到窗口触发时,才一起计算
.process(new ProcessWindowFunction<WaterSensor, Long, String, TimeWindow>() {
@Override
public void process(String key, Context ctx, Iterable<WaterSensor> elements, Collector<Long> out) throws Exception {
System.out.println("process");
long l = elements.spliterator().estimateSize();
out.collect(l);
}
}).print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
四.wartermark
4.1.AccendWatermark
package com.flink.wartermark;
import com.flink.bean.WaterSensor;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AscendingTimestampExtractor;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
//升序
public class AccendWatermark {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
DataStreamSource<String> dsSocket = env.socketTextStream("localhost",9999);
SingleOutputStreamOperator<WaterSensor> dsWaterSensor = dsSocket.map(line -> {
String[] lineValue = line.split(",");
return new WaterSensor(lineValue[0], Long.valueOf(lineValue[1]), Integer.valueOf(lineValue[2]));
});
SingleOutputStreamOperator<WaterSensor> ds = dsWaterSensor.assignTimestampsAndWatermarks(
new AscendingTimestampExtractor<WaterSensor>() {
@Override
public long extractAscendingTimestamp(WaterSensor waterSensor) {
return waterSensor.getTs() * 1000L;
}
});
ds.keyBy(value->value.getId())
//.window(TumblingEventTimeWindows.of(Time.seconds(5)))
.timeWindow(Time.seconds(3))
.process(new ProcessWindowFunction<WaterSensor, String, String, TimeWindow>() {
@Override
public void process(String key, Context ctx, Iterable<WaterSensor> iterable, Collector<String> out) throws Exception {
out.collect("key="+key+
"\n"+"window="+ctx.window().getStart()+"--"+ctx.window().getEnd()
+"\n"+"watermark="+ctx.currentWatermark()
+"\n"+"size="+iterable.spliterator().estimateSize());
}
})
.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
4.2.SequenceWatermark
package com.flink.wartermark;
import com.flink.bean.WaterSensor;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
//乱序
public class SequenceWatermark {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
DataStreamSource<String> dsSocket = env.socketTextStream("hadoop162",9999);
SingleOutputStreamOperator<WaterSensor> dsWaterSensor = dsSocket.map(line -> {
String[] lineValue = line.split(",");
return new WaterSensor(lineValue[0], Long.valueOf(lineValue[1]), Integer.valueOf(lineValue[2]));
});
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
五.operator
5.1.O_RollingAggregation_Reduce
package com.flink.operator;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
public class O_RollingAggregation_Reduce {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> ds = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\word_count.txt");
ds.flatMap(new FlatMapFunction<String, Tuple2<String,Integer>>() {
@Override
public void flatMap(String in, Collector<Tuple2<String,Integer>> out) throws Exception {
String[] line = in.split(" ");
for (String s : line) {
out.collect(new Tuple2(s,1));
}
}
})
.keyBy(0)
//.sum(1)
.reduce(new ReduceFunction<Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> reduce(Tuple2<String, Integer> now, Tuple2<String, Integer> next) throws Exception {
return new Tuple2<String,Integer>(now.f0,new Integer(now.f1+next.f1));
}
})
.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
5.2.O_Process
package com.flink.operator;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;
public class O_Process{
public static class MyProcess extends KeyedProcessFunction<String, Tuple2<String,Integer>,String> {
@Override
public void processElement(Tuple2<String, Integer> in, Context ctx, Collector<String> out) throws Exception {
out.collect("Key: "+ctx.getCurrentKey()+" , "+"Value: "+in);
}
}
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStreamSource<String> ds = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\word_count.txt");
KeyedStream<Tuple2<String, Integer>, String> ks = ds.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String in, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] line = in.split(" ");
for (String s : line) {
out.collect(new Tuple2(s, 1));
}
}
})
.keyBy(value -> value.f0);
// .reduce(new ReduceFunction<Tuple2<String, Integer>>() {
// @Override
// public Tuple2<String, Integer> reduce(Tuple2<String, Integer> now, Tuple2<String, Integer> next) throws Exception {
// return new Tuple2<String,Integer>(now.f0,new Integer(now.f1+next.f1));
// }
// })
ks.process(new MyProcess())
.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
六.sink
6.1.ToKafka
package com.flink.sink;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011;
public class ToKafka {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> ds = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\water_sensor.txt");
ds.addSink(
new FlinkKafkaProducer011<String>(
"hadoop162:9092",
"water_sensor",
new SimpleStringSchema()
)
);
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
6.2.ToES
package com.flink.sink;
import com.flink.bean.WaterSensor;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink;
import org.apache.flink.table.descriptors.Elasticsearch;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;
import java.util.ArrayList;
import java.util.HashMap;
public class ToES {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
ArrayList<HttpHost> httpHosts = new ArrayList<>();
httpHosts.add(new HttpHost("hadoop162",9200));
httpHosts.add(new HttpHost("hadoop163",9200));
httpHosts.add(new HttpHost("hadoop164",9200));
DataStreamSource<String> ds = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\water_sensor.txt");
SingleOutputStreamOperator<WaterSensor> dsWS = ds.map(new MapFunction<String, WaterSensor>() {
@Override
public WaterSensor map(String in) throws Exception {
String[] line = in.split(",");
return new WaterSensor(line[0], Long.valueOf(line[1]), Integer.valueOf(line[2]));
}
});
ElasticsearchSink<String> esSink = new ElasticsearchSink.Builder<String>(
httpHosts,
new ElasticsearchSinkFunction<String>() {
@Override
public void process(String element, RuntimeContext ctx, RequestIndexer indexer) {
//将数据放入Map
HashMap<String, String> dataMap = new HashMap<>();
dataMap.put("data", element);
//指定索引,类型,source
IndexRequest indexRequest = Requests.indexRequest("water_sensor").type("reading").source(dataMap);
indexer.add(indexRequest);
}
}
).build();
ds.addSink(esSink);
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
6.3.ToRedis
package com.flink.sink;
import org.apache.commons.math3.fitting.leastsquares.EvaluationRmsChecker;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisClusterConfig;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
public class ToRedis {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> ds = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\water_sensor.txt");
FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder()
.setHost("hadoop162")
.setPort(6379)
.build();
ds.addSink(new RedisSink<String>(
conf,
new RedisMapper<String>() {
//最外层key
@Override
public RedisCommandDescription getCommandDescription() {
return new RedisCommandDescription(RedisCommand.HSET,"water_sensor");
}
//hash key
@Override
public String getKeyFromData(String data) {
String[] line= data.split(",");
return line[0]+line[1];
}
//hash value
@Override
public String getValueFromData(String data) {
String[] line= data.split(",");
return line[2];
}
}
));
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
6.4.CustomizeSink
package com.flink.sink;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
public class CustomizeSink {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> ds = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\water_sensor.txt");
ds.addSink(
new RichSinkFunction<String>() {
private Connection conn=null;
private PreparedStatement ps=null;
@Override
public void open(Configuration parameters) throws Exception {
conn= DriverManager.getConnection("jdbc:postgresql://192.168.1.163:5432/test_db2","test_user","aaaaaa");
ps=conn.prepareStatement("INSERT INTO water_sensor values(?,?,?)");
}
@Override
public void invoke(String value, Context ctx) throws Exception {
String[] line = value.split(",");
ps.setString(1,line[0]);
ps.setLong(2,Long.valueOf(line[1]));
ps.setInt(3,Integer.valueOf(line[2]));
ps.execute();
}
@Override
public void close() throws Exception {
ps.close();
conn.close();
}
}
);
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
七.process_function(待完善)
Flink提供了8个Process Function:
ProcessFunction
KeyedProcessFunction
CoProcessFunction
ProcessJoinFunction
BroadcastProcessFunction
KeyedBroadcastProcessFunction
ProcessWindowFunction
ProcessAllWindowFunction
//操作上下文环境获取 窗口信息,watermark信息 当前key 等......
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.flink</groupId>
<artifactId>flink1.11</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<flink.version>1.11.2</flink.version>
<java.version>1.8</java.version>
<scala.binary.version>2.11</scala.binary.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target>
<log4j.version>2.12.1</log4j.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/log4j/log4j -->
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-shaded-hadoop-2-uber -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-shaded-hadoop-2-uber</artifactId>
<version>2.7.5-7.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.postgresql/postgresql -->
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>42.2.5</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc_2.11</artifactId>
<version>${flink.version}</version>
<!-- <classifier>sources</classifier>-->
<!-- <type>java-source</type>-->
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.bahir/flink-connector-redis -->
<dependency>
<groupId>org.apache.bahir</groupId>
<artifactId>flink-connector-redis_${scala.binary.version}</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-elasticsearch6_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.postgresql/postgresql -->
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>42.2.5</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.0.0</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
bean.WaterSensor
WaterSensor
package com.flink.bean;
// 定义样例类:水位传感器:用于接收空高数据
// id:传感器编号
// ts:时间戳
// vc:空高
public class WaterSensor {
private String id;
private Long ts;
private Integer vc;
public WaterSensor() {
}
public WaterSensor(String id, Long ts, Integer vc) {
this.id = id;
this.ts = ts;
this.vc = vc;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public Long getTs() {
return ts;
}
public void setTs(Long ts) {
this.ts = ts;
}
public Integer getVc() {
return vc;
}
public void setVc(Integer vc) {
this.vc = vc;
}
@Override
public String toString() {
return "WaterSensor{" +
"id='" + id + '\'' +
", ts=" + ts +
", vc=" + vc +
'}';
}
}
input
water_sensor.txt
sensor_1,1549044122,1
sensor_1,1549044123,2
sensor_1,1549044124,3
sensor_1,1549044125,4
sensor_2,1549044123,2
sensor_3,1549044124,3
sensor_4,1549044125,4
sensor_5,1549044126,5
sensor_6,1549044127,6
sensor_7,1549044128,7
sensor_8,1549044129,8
sensor_9,1549044130,9
sensor_101549044130,10
sensor_11,1549044122,11
sensor_12,1549044123,12
sensor_13,1549044124,13
sensor_14,1549044125,14
sensor_15,1549044126,15
sensor_16,1549044127,16
sensor_17,1549044128,17
sensor_18,1549044129,18
sensor_19,1549044130,19
word_count.txt
a1 a2 b1
b1 b2 c1 c2 a2
c2 b2 b3
b1 b2 c3 c2 a1
c2 b2 b3
b1 b1 c3 c2 a1
c2 b1 b2