Flink流处理API 使用
一、Source
1、以kafka消息队列的数据作为来源
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import java.util.Properties;
public class Flink03_Source_Kafka {
public static void main(String[] args) throws Exception {
//1.获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.从kafka读取数据
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "hadoop102:9092");
properties.setProperty("group.id", "consumer-group");
properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.setProperty("auto.offset.reset", "latest");
DataStreamSource<String> kafkaDS = env.addSource(new FlinkKafkaConsumer011<String>("test",
new SimpleStringSchema(),
properties));
//3.打印
kafkaDS.print();
//4.启动任务
env.execute("Flink03_Source_Kafka");
}
}
##### 2、从文件读取数据
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class Flink02_Source_File {
public static void main(String[] args) throws Exception {
//1.获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.从文件读取数据
DataStreamSource<String> sensorDS = env.readTextFile("sensor");
//3.打印
sensorDS.print();
//4.启动任务
env.execute("Flink02_Source_File");
}
}
3、从集合中读取数据
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import java.util.Arrays;
public class Flink01_Source_Collection {
public static void main(String[] args) throws Exception {
//1.获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.读取集合数据创建流
DataStreamSource<SensorReading> sensorDS = env.fromCollection(Arrays.asList(
new SensorReading("sensor_1", 1547718199L, 35.8),
new SensorReading("sensor_6", 1547718201L, 15.4),
new SensorReading("sensor_7", 1547718202L, 6.7),
new SensorReading("sensor_10", 1547718205L, 38.1)
));
//3.打印
sensorDS.print();
//4.启动任务
env.execute("Flink01_Source_Collection");
}
}
### 二、Transform
##### 1、map
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class Flink05_Transform_Map {
public static void main(String[] args) throws Exception {
//1.获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.从文件读取数据
DataStreamSource<String> fileDS = env.readTextFile("sensor");
//3.将每一行数据转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = fileDS.map(new MapFunction<String, SensorReading>() {
@Override
public SensorReading map(String value) throws Exception {
String[] fields = value.split(",");
return new SensorReading(fields[0],
Long.parseLong(fields[1]),
Double.parseDouble(fields[2]));
}
});
//4.打印
sensorDS.print();
//5.执行
env.execute("Flink05_Transform_Map");
}
}
2、flatMap
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
public class Flink06_Transform_FlatMap {
public static void main(String[] args) throws Exception {
//1.获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.从文件读取数据
DataStreamSource<String> fileDS = env.readTextFile("sensor");
//3.压平数据操作
SingleOutputStreamOperator<String> flatMapDS = fileDS.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String value, Collector<String> out) throws Exception {
String[] split = value.split(",");
for (String s : split) {
out.collect(s);
}
}
});
//4.打印
flatMapDS.print();
//5.执行
env.execute("Flink06_Transform_FlatMap");
}
}
##### 3、Filter
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class Flink07_Transform_Filter {
public static void main(String[] args) throws Exception {
//1.获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.从文件读取数据
DataStreamSource<String> fileDS = env.readTextFile("sensor");
//3.过滤,取出30度以上的数据
SingleOutputStreamOperator<String> filter = fileDS.filter(new FilterFunction<String>() {
@Override
public boolean filter(String value) throws Exception {
double temp = Double.parseDouble(value.split(",")[2]);
return temp > 30.0D;
}
});
filter.print();
env.execute();
}
}
4、max
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class Flink08_Transform_Max {
public static void main(String[] args) throws Exception {
//1.获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.从文件读取数据
DataStreamSource<String> fileDS = env.readTextFile("sensor");
//3.将每一行数据转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = fileDS.map(new MapFunction<String, SensorReading>() {
@Override
public SensorReading map(String value) throws Exception {
String[] fields = value.split(",");
return new SensorReading(fields[0],
Long.parseLong(fields[1]),
Double.parseDouble(fields[2]));
}
});
//4.分组
KeyedStream<SensorReading, Tuple> keyedStream = sensorDS.keyBy("id");
//5.求每个传感器中最高温度
//带第一行流信息的最大值
SingleOutputStreamOperator<SensorReading> maxResult = keyedStream.max("temp");
//这是找到最大值流的信息
SingleOutputStreamOperator<SensorReading> maxByResult = keyedStream.maxBy("temp");
//6.打印结果
maxResult.print("max");
maxByResult.print("maxBy");
//7.执行
env.execute();
}
}
##### 5、 Reduce
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class Flink09_Transform_Reduce {
public static void main(String[] args) throws Exception {
//1.获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.从文件读取数据
DataStreamSource<String> fileDS = env.readTextFile("sensor");
//3.将每一行数据转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = fileDS.map(new MapFunction<String, SensorReading>() {
@Override
public SensorReading map(String value) throws Exception {
String[] fields = value.split(",");
return new SensorReading(fields[0],
Long.parseLong(fields[1]),
Double.parseDouble(fields[2]));
}
});
//4.分组
KeyedStream<SensorReading, Tuple> keyedStream = sensorDS.keyBy("id");
//5.计算每个传感器的最高温度以及最近的时间
SingleOutputStreamOperator<SensorReading> reduce = keyedStream.reduce(new ReduceFunction<SensorReading>() {
@Override
public SensorReading reduce(SensorReading value1, SensorReading value2) throws Exception {
return new SensorReading(value1.getId(),
value2.getTs(),
Math.max(value1.getTemp(), value2.getTemp()));
}
});
//6.打印
reduce.print();
//7.执行
env.execute();
}
}
6、Split 和 Select
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.SplitStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import java.util.Arrays;
import java.util.Collections;
public class Flink10_Transform_Split {
public static void main(String[] args) throws Exception {
//1.获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.从文件读取数据
DataStreamSource<String> fileDS = env.readTextFile("sensor");
//3.将每一行数据转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = fileDS.map(new MapFunction<String, SensorReading>() {
@Override
public SensorReading map(String value) throws Exception {
String[] fields = value.split(",");
return new SensorReading(fields[0],
Long.parseLong(fields[1]),
Double.parseDouble(fields[2]));
}
});
//4.根据温度大小做分流
SplitStream<SensorReading> split = sensorDS.split(new OutputSelector<SensorReading>() {
@Override
public Iterable<String> select(SensorReading value) {
return value.getTemp() > 30 ?
Collections.singletonList("high") : Collections.singletonList("low");
}
});
//5.选择流
DataStream<SensorReading> high = split.select("high");
DataStream<SensorReading> low = split.select("low");
DataStream<SensorReading> all = split.select("high","low");
//6.打印输出
high.print("high");
low.print("low");
all.print("all");
//7.执行
env.execute();
}
}
##### 7、Connect和 CoMap
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.datastream.*;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.CoMapFunction;
import java.util.Collections;
public class Flink11_Transform_Connect {
public static void main(String[] args) throws Exception {
//1.获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.从文件读取数据
DataStreamSource<String> fileDS = env.readTextFile("sensor");
//3.将每一行数据转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = fileDS.map(new MapFunction<String, SensorReading>() {
@Override
public SensorReading map(String value) throws Exception {
String[] fields = value.split(",");
return new SensorReading(fields[0],
Long.parseLong(fields[1]),
Double.parseDouble(fields[2]));
}
});
//4.根据温度大小做分流
SplitStream<SensorReading> split = sensorDS.split(new OutputSelector<SensorReading>() {
@Override
public Iterable<String> select(SensorReading value) {
return value.getTemp() > 30 ?
Collections.singletonList("high") : Collections.singletonList("low");
}
});
//5.选择流
SingleOutputStreamOperator<Tuple2<String, Double>> high = split.select("high").map(new MapFunction<SensorReading, Tuple2<String, Double>>() {
@Override
public Tuple2<String, Double> map(SensorReading value) throws Exception {
return new Tuple2<>(value.getId(), value.getTemp());
}
});
DataStream<SensorReading> low = split.select("low");
//6.连接2个流
ConnectedStreams<Tuple2<String, Double>, SensorReading> connect = high.connect(low);
//7.将2个流真正合并
SingleOutputStreamOperator<Object> map = connect.map(new CoMapFunction<Tuple2<String, Double>, SensorReading, Object>() {
@Override
public Object map1(Tuple2<String, Double> value) throws Exception {
return new Tuple3<String, Double, String>(value.f0, value.f1, "warn");
}
@Override
public Object map2(SensorReading value) throws Exception {
return new Tuple2<String, String>(value.getId(), "healthy");
}
});
//8.数据打印
map.print();
//9.执行
env.execute();
}
}
8、 Union
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.SplitStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import java.util.Collections;
public class Flink12_Transform_Union {
public static void main(String[] args) throws Exception {
//1.获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.从文件读取数据
DataStreamSource<String> fileDS = env.readTextFile("sensor");
//3.将每一行数据转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = fileDS.map(new MapFunction<String, SensorReading>() {
@Override
public SensorReading map(String value) throws Exception {
String[] fields = value.split(",");
return new SensorReading(fields[0],
Long.parseLong(fields[1]),
Double.parseDouble(fields[2]));
}
});
//4.根据温度大小做分流
SplitStream<SensorReading> split = sensorDS.split(new OutputSelector<SensorReading>() {
@Override
public Iterable<String> select(SensorReading value) {
return value.getTemp() > 30 ?
Collections.singletonList("high") : Collections.singletonList("low");
}
});
//5.选择流
DataStream<SensorReading> high = split.select("high");
DataStream<SensorReading> low = split.select("low");
//6.求并集
DataStream<SensorReading> all = high.union(low);
//7.打印数据
all.print();
//8.执行
env.execute();
}
}
##### 9、RichMap (富函数)
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class Flink13_Transform_RichMap {
public static void main(String[] args) throws Exception {
//1.获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.从文件读取数据
DataStreamSource<String> fileDS = env.readTextFile("sensor");
//3.将每一行数据转换为JavaBean对象
SingleOutputStreamOperator<SensorReading> map = fileDS.map(new MyRichMapFunc());
//4.打印数据
map.print();
//5.执行
env.execute();
}
public static class MyRichMapFunc extends RichMapFunction<String, SensorReading> {
@Override
public void open(Configuration parameters) throws Exception {
//创建连接
super.open(parameters);
System.out.println("open方法被调用");
}
@Override
public SensorReading map(String value) throws Exception {
//使用连接
String[] fields = value.split(",");
return new SensorReading(fields[0],
Long.parseLong(fields[1]),
Double.parseDouble(fields[2]));
}
@Override
public void close() throws Exception {
//关闭连接
super.close();
System.out.println("close方法被调用");
}
}
}
三、Transform练习题
1、使用"Reduce"来实现Flink中流式WordCount
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
public class Test01_WordCount_Reduce {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//2.读取端口数据创建流
DataStreamSource<String> textStream = env.socketTextStream("hadoop102", 7777);
//3.压平
SingleOutputStreamOperator<Tuple2<String, Integer>> wordDS = textStream.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] words = value.split(" ");
for (String word : words) {
out.collect(new Tuple2<>(word, 1));
}
}
});
//4.分组
KeyedStream<Tuple2<String, Integer>, Tuple> keyedStream = wordDS.keyBy(0);
//5.使用"reduce"实现WordCount
SingleOutputStreamOperator<Tuple2<String, Integer>> result = keyedStream.reduce(new ReduceFunction<Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception {
return new Tuple2<>(value1.f0, value1.f1 + value2.f1);
}
});
//6.打印结果
result.print();
//7.执行任务
env.execute();
}
}
##### 2、从Kafka读取传感器温度数据,根据温度高低(30度为界限)分为高温流和低温流
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SplitStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import java.util.Collections;
import java.util.Properties;
public class Test02_KafkaSource_Split {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//2.读取Kafka数据创建流
Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "hadoop102:9092");
properties.put(ConsumerConfig.GROUP_ID_CONFIG, "test");
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
DataStreamSource<String> kafkaDS = env.addSource(new FlinkKafkaConsumer011<String>("test", new SimpleStringSchema(), properties));
//3.分流
SplitStream<String> split = kafkaDS.split(new OutputSelector<String>() {
@Override
public Iterable<String> select(String value) {
//获取温度
double temp = Double.parseDouble(value.split(",")[2]);
return temp > 30.0 ? Collections.singletonList("high") : Collections.singletonList("low");
}
});
//4.选择流
split.select("high").print("high");
split.select("low").print("low");
//5.执行任务
env.execute();
}
}
3、读取文本数据,将每一行数据拆分成单个单词,对单词进行去重输出
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.RichFilterFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
import redis.clients.jedis.Jedis;
public class Test03_Distinct {
public static void main(String[] args) throws Exception {
//1.获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.从文件中读取数据创建流
DataStreamSource<String> inputDS = env.readTextFile("input");
//3.压平
SingleOutputStreamOperator<String> wordDS = inputDS.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String value, Collector<String> out) throws Exception {
String[] words = value.split(" ");
for (String word : words) {
out.collect(word);
}
}
});
//4.过滤
SingleOutputStreamOperator<String> filter = wordDS.filter(new RichFilterFunction<String>() {
//声明Redis连接
Jedis jedis = null;
//定义Set RedisKey
String redisKey = "distinct";
@Override
public void open(Configuration parameters) throws Exception {
jedis = new Jedis("hadoop102", 6379);
}
@Override
public boolean filter(String value) throws Exception {
//查询Redis中是否存在该单词
Boolean exist = jedis.sismember(redisKey, value);
//将该单词写入Redis
if (!exist) {
jedis.sadd(redisKey, value);
}
//返回
return !exist;
}
@Override
public void close() throws Exception {
jedis.close();
}
});
//5.打印数据
filter.print();
//6.执行任务
env.execute();
}
}
### 四、sink
##### 1、Sink_Kafka
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011;
public class Flink01_Sink_Kafka {
public static void main(String[] args) throws Exception {
//1.获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.从文件中读取数据创建流
DataStreamSource<String> inputDS = env.readTextFile("sensor");
//3.将数据写入kafka
inputDS.addSink(new FlinkKafkaProducer011<String>("hadoop102:9092", "test", new SimpleStringSchema()));
//4.执行任务
env.execute("Flink01_Sink_Kafka");
}
}
2、Sink_Redis
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
public class Flink02_Sink_Redis {
public static void main(String[] args) throws Exception {
//1.获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.从文件中读取数据创建流
DataStreamSource<String> inputDS = env.readTextFile("sensor");
//3.将数据写入Redis
FlinkJedisPoolConfig config = new FlinkJedisPoolConfig.Builder()
.setHost("hadoop102")
.setPort(6379)
.build();
inputDS.addSink(new RedisSink<>(config, new MyRedisMapper()));
//4.执行任务
env.execute();
}
public static class MyRedisMapper implements RedisMapper<String> {
@Override
public RedisCommandDescription getCommandDescription() {
return new RedisCommandDescription(RedisCommand.HSET, "sensor");
}
@Override
public String getKeyFromData(String data) {
String[] fields = data.split(",");
return fields[0];
}
@Override
public String getValueFromData(String data) {
String[] fields = data.split(",");
return fields[2];
}
}
}
##### 3、Sink_ES
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;
import java.util.ArrayList;
import java.util.HashMap;
public class Flink03_Sink_ES {
public static void main(String[] args) throws Exception {
//1.获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.从文件中读取数据创建流
DataStreamSource<String> inputDS = env.readTextFile("sensor");
//3.将数据写入ES
//3.1 创建集合用于存放连接条件
ArrayList<HttpHost> httpHosts = new ArrayList<>();
httpHosts.add(new HttpHost("hadoop102", 9200));
//3.2 构建ElasticsearchSink
ElasticsearchSink<String> elasticsearchSink =
new ElasticsearchSink.Builder<>(httpHosts, new MyEsSinkFunc())
.build();
//3.3 写入数据操作
inputDS.addSink(elasticsearchSink);
//4.执行任务
env.execute();
}
public static class MyEsSinkFunc implements ElasticsearchSinkFunction<String> {
@Override
public void process(String element, RuntimeContext ctx, RequestIndexer indexer) {
//对元素分割处理
String[] fields = element.split(",");
//创建Map用于存放待存储到ES的数据
HashMap<String, String> source = new HashMap<>();
source.put("id", fields[0]);
source.put("ts", fields[1]);
source.put("temp", fields[2]);
//创建IndexRequest
IndexRequest indexRequest = Requests.indexRequest()
.index("sensor")
.type("_doc")
// .id(fields[0])
.source(source);
//将数据写入
indexer.add(indexRequest);
}
}
}
4、Sink_JDBC
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
public class Flink04_Sink_JDBC {
public static void main(String[] args) throws Exception {
//1.获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.从文件中读取数据创建流
DataStreamSource<String> inputDS = env.readTextFile("sensor");
//3.将数据写入MySQL
inputDS.addSink(new JdbcSink());
//4.执行任务
env.execute();
}
public static class JdbcSink extends RichSinkFunction<String> {
//声明MySQL相关的属性信息
Connection connection = null;
PreparedStatement preparedStatement = null;
@Override
public void open(Configuration parameters) throws Exception {
connection = DriverManager.getConnection("jdbc:mysql://hadoop102:3306/test", "root", "000000");
preparedStatement = connection.prepareStatement("INSERT INTO sensor(id,temp) VALUES(?,?) ON DUPLICATE KEY UPDATE temp=?");
}
@Override
public void invoke(String value, Context context) throws Exception {
//分割数据
String[] fields = value.split(",");
//给预编译SQL赋值
preparedStatement.setString(1, fields[0]);
preparedStatement.setDouble(2, Double.parseDouble(fields[2]));
preparedStatement.setDouble(3, Double.parseDouble(fields[2]));
//执行
preparedStatement.execute();
}
@Override
public void close() throws Exception {
preparedStatement.close();
connection.close();
}
}
}
### 五、window
##### 1、Window_TumplingTime 滚动窗口
import com.atguigu.day01.Flink01_WordCount_Batch;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.WindowedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
public class Flink05_Window_TumplingTime {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//2.读取端口数据
//DataStreamSource<String> input = env.readTextFile("input");
DataStreamSource<String> input = env.socketTextStream("hadoop102", 7777);
//3.压平
SingleOutputStreamOperator<Tuple2<String, Integer>> wordToOneDS = input.flatMap(new Flink01_WordCount_Batch.MyFlatMapFunc());
//4.重分区
KeyedStream<Tuple2<String, Integer>, Tuple> keyedStream = wordToOneDS.keyBy(0);
//5.简化版本滚动时间开窗
WindowedStream<Tuple2<String, Integer>, Tuple, TimeWindow> windowDStream = keyedStream.timeWindow(Time.seconds(5));
//6.计算
SingleOutputStreamOperator<Tuple2<String, Integer>> sum = windowDStream.sum(1);
//7.打印
sum.print();
//8.执行任务
env.execute();
}
}
2、Window_SlidingTime滑动窗口
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.WindowedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
public class Flink06_Window_SlidingTime {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//2.读取端口数据
//DataStreamSource<String> input = env.readTextFile("input");
DataStreamSource<String> input = env.socketTextStream("hadoop102", 7777);
//3.压平
SingleOutputStreamOperator<Tuple2<String, Integer>> wordToOneDS = input.flatMap(new Flink01_WordCount_Batch.MyFlatMapFunc());
//4.重分区
KeyedStream<Tuple2<String, Integer>, Tuple> keyedStream = wordToOneDS.keyBy(0);
//5.简化版本滑动时间开窗
WindowedStream<Tuple2<String, Integer>, Tuple, TimeWindow> windowDStream = keyedStream.timeWindow(Time.seconds(6), Time.seconds(2));
//6.计算
SingleOutputStreamOperator<Tuple2<String, Integer>> sum = windowDStream.sum(1);
//7.打印
sum.print();
//8.执行任务
env.execute();
}
}
##### 3、Window_Offset 滚动窗口偏移
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.WindowedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
public class Flink07_Window_Offset {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//2.读取端口数据
//DataStreamSource<String> input = env.readTextFile("input");
DataStreamSource<String> input = env.socketTextStream("hadoop102", 7777);
//3.压平
SingleOutputStreamOperator<Tuple2<String, Integer>> wordToOneDS = input.flatMap(new Flink01_WordCount_Batch.MyFlatMapFunc());
//4.重分区
KeyedStream<Tuple2<String, Integer>, Tuple> keyedStream = wordToOneDS.keyBy(0);
//5.开窗
WindowedStream<Tuple2<String, Integer>, Tuple, TimeWindow> window = keyedStream.window(TumblingProcessingTimeWindows.of(Time.seconds(5), Time.seconds(1)));
window.sum(1).print();
env.execute();
}
}
4、Window_SessionTime会话窗口
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.WindowedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.ProcessingTimeSessionWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
public class Flink08_Window_SessionTime {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//2.读取端口数据
//DataStreamSource<String> input = env.readTextFile("input");
DataStreamSource<String> input = env.socketTextStream("hadoop102", 7777);
//3.压平
SingleOutputStreamOperator<Tuple2<String, Integer>> wordToOneDS = input.flatMap(new Flink01_WordCount_Batch.MyFlatMapFunc());
//4.重分区
KeyedStream<Tuple2<String, Integer>, Tuple> keyedStream = wordToOneDS.keyBy(0);
//5.使用会话窗口
WindowedStream<Tuple2<String, Integer>, Tuple, TimeWindow> window = keyedStream.window(ProcessingTimeSessionWindows.withGap(Time.seconds(5)));
//6.计算
SingleOutputStreamOperator<Tuple2<String, Integer>> sum = window.sum(1);
//7.打印
sum.print();
//8.执行任务
env.execute();
}
}
##### 5、TumplingCount 滚动串口按数量
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.WindowedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.windows.GlobalWindow;
public class Flink09_TumplingCount {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//2.读取端口数据
//DataStreamSource<String> input = env.readTextFile("input");
DataStreamSource<String> input = env.socketTextStream("hadoop102", 7777);
//3.压平
SingleOutputStreamOperator<Tuple2<String, Integer>> wordToOneDS = input.flatMap(new Flink01_WordCount_Batch.MyFlatMapFunc());
//4.重分区
KeyedStream<Tuple2<String, Integer>, Tuple> keyedStream = wordToOneDS.keyBy(0);
//5.计数滚动窗口
WindowedStream<Tuple2<String, Integer>, Tuple, GlobalWindow> window = keyedStream.countWindow(5);
//6.计算
SingleOutputStreamOperator<Tuple2<String, Integer>> sum = window.sum(1);
//7.打印数据
sum.print();
//8.执行任务
env.execute();
}
}
6、SlidingCount 滑动窗口按数量
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.WindowedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.windows.GlobalWindow;
public class Flink10_SlidingCount {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//2.读取端口数据
//DataStreamSource<String> input = env.readTextFile("input");
DataStreamSource<String> input = env.socketTextStream("hadoop102", 7777);
//3.压平
SingleOutputStreamOperator<Tuple2<String, Integer>> wordToOneDS = input.flatMap(new Flink01_WordCount_Batch.MyFlatMapFunc());
//4.重分区
KeyedStream<Tuple2<String, Integer>, Tuple> keyedStream = wordToOneDS.keyBy(0);
//5.计数滚动窗口
WindowedStream<Tuple2<String, Integer>, Tuple, GlobalWindow> window = keyedStream.countWindow(5, 2);
//6.计算
SingleOutputStreamOperator<Tuple2<String, Integer>> sum = window.sum(1);
//7.打印数据
sum.print();
//8.执行任务
env.execute();
}
}
7、Window_Apply 全窗口函数
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.WindowedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
import java.util.Iterator;
public class Flink11_Window_Apply {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//2.读取端口数据
//DataStreamSource<String> input = env.readTextFile("input");
DataStreamSource<String> input = env.socketTextStream("hadoop102", 7777);
//3.压平
SingleOutputStreamOperator<Tuple2<String, Integer>> wordToOneDS = input.flatMap(new Flink01_WordCount_Batch.MyFlatMapFunc());
//4.重分区
KeyedStream<Tuple2<String, Integer>, Tuple> keyedStream = wordToOneDS.keyBy(0);
//5.滚动时间开窗
WindowedStream<Tuple2<String, Integer>, Tuple, TimeWindow> windowDStream = keyedStream.timeWindow(Time.seconds(5));
//6.计算
SingleOutputStreamOperator<Integer> sum = windowDStream.apply(new MyWindowFunc());
//7.打印
sum.print();
//8.执行任务
env.execute();
}
public static class MyWindowFunc implements WindowFunction<Tuple2<String, Integer>, Integer, Tuple, TimeWindow> {
@Override
public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<String, Integer>> input, Collector<Integer> out) throws Exception {
Integer count = 0;
Iterator<Tuple2<String, Integer>> iterator = input.iterator();
while (iterator.hasNext()) {
Tuple2<String, Integer> next = iterator.next();
count += 1;
}
out.collect(count);
}
}
}
六、Flink SQL 操作
1、Flink中总体流程 FlinkSQL01_Test
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
public class FlinkSQL01_Test {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.读取文本数据创建流并转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.readTextFile("sensor")
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.创建TableAPI FlinkSQL 的执行环境
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//4.使用Table执行环境将流转换为Table
tableEnv.createTemporaryView("sensor", sensorDS);
//5.TableAPI
Table table = tableEnv.from("sensor");
Table tableResult = table.select("id,temp").where("id='sensor_1'");
//6.SQL
Table sqlResult = tableEnv.sqlQuery("select id,temp from sensor where id ='sensor_1'");
//7.将结果数据打印
tableEnv.toAppendStream(tableResult, Row.class).print("tableResult");
tableEnv.toAppendStream(sqlResult, Row.class).print("sqlResult");
//8.执行
env.execute();
}
}
##### 2、Env 基于新、旧版本的流式处理环境
```java
package com.atguigu.day06;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.java.BatchTableEnvironment;
import org.apache.flink.table.api.java.StreamTableEnvironment;
public class FlinkSQL02_Env {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//1.基于老版本的流式处理环境
EnvironmentSettings settings = EnvironmentSettings.newInstance()
.useOldPlanner() // 使用老版本planner
.inStreamingMode() // 流处理模式
.build();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, settings);
//2.基于老版本的批处理环境
ExecutionEnvironment batchEnv = ExecutionEnvironment.getExecutionEnvironment();
BatchTableEnvironment batchTableEnv = BatchTableEnvironment.create(batchEnv);
//3.基于新版本的流式处理环境
EnvironmentSettings bsSettings = EnvironmentSettings.newInstance()
.useBlinkPlanner()
.inStreamingMode()
.build();
StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(env, bsSettings);
//4.基于新版本的批处理环境
EnvironmentSettings bbSettings = EnvironmentSettings.newInstance()
.useBlinkPlanner()
.inBatchMode()
.build();
TableEnvironment bbTableEnv = TableEnvironment.create(bbSettings);
}
}
3、Source_File 连接器读取文件数据源 (这种方式读取数据源不常用)
package com.atguigu.day06;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.descriptors.FileSystem;
import org.apache.flink.table.descriptors.OldCsv;
import org.apache.flink.table.descriptors.Schema;
import org.apache.flink.types.Row;
public class FlinkSQL03_Source_File {
public static void main(String[] args) throws Exception {
//1.创建环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.定义文件连接器
tableEnv.connect(new FileSystem().path("sensor"))
.withFormat(new OldCsv())
.withSchema(new Schema()
.field("id", DataTypes.STRING())
.field("ts", DataTypes.BIGINT())
.field("temp", DataTypes.DOUBLE()))
.createTemporaryTable("fileInput");
//3.创建表
Table table = tableEnv.from("fileInput");
//4.TableAPI
Table tableResult = table.where("id='sensor_1'").select("id,temp");
//5.SQL
Table sqlResult = tableEnv.sqlQuery("select id,temp from fileInput where id ='sensor_1'");
//6.将结果数据打印
tableEnv.toAppendStream(tableResult, Row.class).print("tableResult");
tableEnv.toAppendStream(sqlResult, Row.class).print("sqlResult");
//7.执行
env.execute();
}
}
4、Source_Kafka 连接器读取kafka数据源不常用
package com.atguigu.day06;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.descriptors.Csv;
import org.apache.flink.table.descriptors.Json;
import org.apache.flink.table.descriptors.Kafka;
import org.apache.flink.table.descriptors.Schema;
import org.apache.flink.types.Row;
import org.apache.kafka.clients.consumer.ConsumerConfig;
public class FlinkSQL04_Source_Kafka {
public static void main(String[] args) throws Exception {
//1.创建环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.创建Kafka连接器
tableEnv.connect(new Kafka()
.topic("test")
.version("0.11")
.property(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "hadoop102:9092")
.property(ConsumerConfig.GROUP_ID_CONFIG, "testKafkaSource"))
//.withFormat(new Csv())
.withFormat(new Json())
.withSchema(new Schema()
.field("id", DataTypes.STRING())
.field("ts", DataTypes.BIGINT())
.field("temp", DataTypes.DOUBLE()))
.createTemporaryTable("kafkaInput");
//3.创建表
Table table = tableEnv.from("kafkaInput");
//4.TableAPI
Table tableResult = table.where("id='sensor_1'").select("id,temp");
//5.SQL
Table sqlResult = tableEnv.sqlQuery("select id,temp from kafkaInput where id ='sensor_1'");
//6.将结果数据打印
tableEnv.toAppendStream(tableResult, Row.class).print("tableResult");
tableEnv.toAppendStream(sqlResult, Row.class).print("sqlResult");
//7.执行
env.execute();
}
}
5、执行流转换表,聚合sql
package com.atguigu.day06;
import com.atguigu.bean.SensorReading;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
public class FlinkSQL05_Agg {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2.读取文本数据创建流并转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.readTextFile("sensor/sensor.txt")
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.创建TableAPI FlinkSQL 的执行环境
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//4.使用Table执行环境将流转换为Table
tableEnv.createTemporaryView("sensor", sensorDS);
//5.TableAPI
Table table = tableEnv.from("sensor");
Table tableResult = table.groupBy("id").select("id,id.count,temp.avg");
//6.SQL
Table sqlResult = tableEnv.sqlQuery("select id,count(id) from sensor group by id");
//7.将结果数据打印
tableEnv.toRetractStream(tableResult, Row.class).print("tableResult");
tableEnv.toRetractStream(sqlResult, Row.class).print("sqlResult");
//8.执行
env.execute();
}
}
6、Sink_File 将结果写入file文件中
package com.atguigu.day06;
import com.atguigu.bean.SensorReading;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.descriptors.FileSystem;
import org.apache.flink.table.descriptors.OldCsv;
import org.apache.flink.table.descriptors.Schema;
public class FlinkSQL06_Sink_File {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS);
//4.TableAPI
Table tableResult = table.groupBy("id").select("id,id.count as ct");
//5.SQL
tableEnv.createTemporaryView("socket", sensorDS);
Table sqlResult = tableEnv.sqlQuery("select id,temp from socket where id = 'sensor_1'");
//6.将数据写入文件
tableEnv.connect(new FileSystem().path("sensorOut2"))
.withFormat(new OldCsv())
.withSchema(new Schema()
.field("id", DataTypes.STRING())
.field("temp", DataTypes.DOUBLE()))
.createTemporaryTable("sensorOut2");
// tableEnv.from("sensorOut");输入数据
//输出路径
tableEnv.insertInto("sensorOut2", sqlResult);
//7.执行任务
env.execute();
}
}
7、Sink_Kafka 流转table写入kafka中
package com.atguigu.day06;
import com.atguigu.bean.SensorReading;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.descriptors.Csv;
import org.apache.flink.table.descriptors.Kafka;
import org.apache.flink.table.descriptors.Schema;
import org.apache.kafka.clients.producer.ProducerConfig;
public class FlinkSQL07_Sink_Kafka {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS);
//4.TableAPI
Table tableResult = table.select("id,temp").where("id='sensor_1'");
//5.SQL
tableEnv.createTemporaryView("socket", sensorDS);
Table sqlResult = tableEnv.sqlQuery("select id,temp from socket where id='sensor_1'");
//6.将数据写入Kafka
tableEnv.connect(new Kafka()
.version("0.11")
.topic("test")
.property(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "hadoop102:9092"))
.withFormat(new Csv())
.withSchema(new Schema()
.field("id", DataTypes.STRING())
.field("temp", DataTypes.DOUBLE()))
.createTemporaryTable("kafkaSink");
// tableEnv.insertInto("kafkaSink", tableResult);
sqlResult.insertInto("kafkaSink");
//7.执行任务
env.execute();
}
}
8、Sink_ES_Append 写入ES中
package com.atguigu.day06;
import com.atguigu.bean.SensorReading;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.descriptors.Elasticsearch;
import org.apache.flink.table.descriptors.Json;
import org.apache.flink.table.descriptors.Schema;
import org.apache.flink.types.Row;
public class FlinkSQL08_Sink_ES_Append {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS);
//4.TableAPI
Table tableResult = table.select("id,temp").where("id='sensor_1'");
//5.SQL
tableEnv.createTemporaryView("socket", sensorDS);
Table sqlResult = tableEnv.sqlQuery("select id,temp from socket where id='sensor_1'");
//6.将数据写入ES
tableEnv.connect(new Elasticsearch()
.version("6")
.host("hadoop102", 9200, "http")
.index("flink_sql")
.documentType("_doc"))
.inAppendMode()
.withFormat(new Json())
.withSchema(new Schema()
.field("id", DataTypes.STRING())
.field("temp", DataTypes.DOUBLE()))
.createTemporaryTable("EsPath");
tableEnv.insertInto("EsPath", tableResult);
tableEnv.toAppendStream(tableResult, Row.class).print();
//7.执行任务
env.execute();
}
}
9、写入ES中 inUpsertMode更新模式
package com.atguigu.day06;
import com.atguigu.bean.SensorReading;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.descriptors.*;
import org.apache.flink.types.Row;
import org.apache.kafka.clients.consumer.ConsumerConfig;
public class FlinkSQL09_Sink_ES_Upsert {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS);
//4.TableAPI
Table tableResult = table.groupBy("id").select("id,id.count as ct");
tableEnv.createTemporaryView("socket", table);
Table sqlResult = tableEnv.sqlQuery("select id,ts,count(id) as ct from socket group by id,ts");
//5.将数据写入ES
tableEnv.connect(new Elasticsearch()
.version("6")
.host("hadoop102", 9200, "http")
.index("flink_sql04")
.disableFlushOnCheckpoint()
.bulkFlushMaxActions(1)
.documentType("_doc"))
.inUpsertMode()
.withFormat(new Json())
.withSchema(new Schema()
.field("id", DataTypes.STRING())
.field("ts", DataTypes.BIGINT())
.field("ct", DataTypes.BIGINT()))
.createTemporaryTable("EsPath");
tableEnv.insertInto("EsPath", sqlResult);
tableEnv.toRetractStream(sqlResult, Row.class).print();
//6.执行任务
env.execute();
}
}
10、Sink_MySQL 写入mysql中去
package com.atguigu.day06;
import com.atguigu.bean.SensorReading;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.StreamQueryConfig;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
public class FlinkSQL10_Sink_MySQL {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS);
//4.TableAPI
Table tableResult = table.groupBy("id").select("id,id.count as ct");
//5.将数据写入MySQL
String sinkDDL = "create table jdbcOutputTable (" +
" id varchar(20) not null, " +
" ct bigint not null " +
") with (" +
" 'connector.type' = 'jdbc', " +
" 'connector.url' = 'jdbc:mysql://hadoop102:3306/test', " +
" 'connector.table' = 'sensor_count1', " +
" 'connector.driver' = 'com.mysql.jdbc.Driver', " +
" 'connector.username' = 'root', " +
" 'connector.password' = '000000', " +
" 'connector.write.flush.max-rows' = '1')";
tableEnv.sqlUpdate(sinkDDL);
tableEnv.insertInto("jdbcOutputTable", tableResult);
//6.执行任务
env.execute();
}
}
11、PorcessTime_DataStream 系统时间
package com.atguigu.day06;
import com.atguigu.bean.SensorReading;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
public class FlinkSQL11_PorcessTime_DataStream {
public static void main(String[] args) {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS, "id,ts,temp,pt.proctime");
//4.打印Schema信息
table.printSchema();
}
}
12、EventTime_DataStream 事件时间
package com.atguigu.day06;
import com.atguigu.bean.SensorReading;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
public class FlinkSQL14_EventTime_DataStream {
public static void main(String[] args) {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<String>(Time.seconds(1)) {
@Override
public long extractTimestamp(String element) {
String[] split = element.split(",");
return Long.parseLong(split[1]) * 1000L;
}
})
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS, "id,ts,temp,rt.rowtime");
//4.打印Schema信息
table.printSchema();
}
}
七、Flink_SQL window
1、ProcessTime_Tumble 滚动窗口 处理时间
package com.atguigu.day07;
import com.atguigu.bean.SensorReading;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Slide;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.Tumble;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
public class FlinkSQL01_ProcessTime_Tumble {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS, "id,ts,temp,pt.proctime");
//4.TableAPI
// Table result = table.window(Tumble.over("5.seconds").on("pt").as("sw"))
// .groupBy("id,sw")
// .select("id,id.count");
// Table result = table.window(Slide.over("5.rows").every("2.rows").on("pt").as("sw"))
// .groupBy("id,sw")
// .select("id,id.count");
//SQL
tableEnv.createTemporaryView("sensor", table);
Table result = tableEnv.sqlQuery("select id,count(id) as ct,TUMBLE_end(pt,INTERVAL '5' second) from sensor " +
"group by id,TUMBLE(pt,INTERVAL '5' second)");
//5.转换为流进行输出
tableEnv.toAppendStream(result, Row.class).print();
//6.执行
env.execute();
}
}
2、EventTime_Tumble 按滚动窗口 事件时间
package com.atguigu.day07_test;
import com.atguigu.bean.SensorReading;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.Tumble;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
/**
* @author shkstart
* @create 2020-11-24 20:07
*/
public class FlinkSQL02_EventTime_Tumble {
public static void main(String[] args) throws Exception {
//创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//设置事件时间
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
//创建表执行环境
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//读取端口数据创建流,转换为javaBean
SingleOutputStreamOperator<String> sensorDS = env.socketTextStream("hadoop102", 7777).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<String>(Time.seconds(1)) {
@Override
public long extractTimestamp(String element) {
String[] split = element.split(",");
return Long.parseLong(split[1]) * 1000L;
}
});
SingleOutputStreamOperator<SensorReading> lineDS = sensorDS.map(new MapFunction<String, SensorReading>() {
@Override
public SensorReading map(String line) throws Exception {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
}
});
//将流转换为表
Table table = tableEnv.fromDataStream(sensorDS, "id,ts,temp,rt.rowtime");
//tableAPI
Table result = table.window(Tumble.over("5.senconds").on("rt").as("tw")).groupBy("id,tw").select("id,id.count,tw.end");
//转换为流输出
tableEnv.toAppendStream(result, Row.class).print();
//执行任务
env.execute();
}
}
3、ProcessTime_Slide 滑动窗口
package com.atguigu.day07;
import com.atguigu.bean.SensorReading;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Slide;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
public class FlinkSQL03_ProcessTime_Slide {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS, "id,ts,temp,pt.proctime");
//4.TableAPI
// Table result = table.window(Slide.over("5.seconds").every("2.seconds").on("pt").as("sw"))
// .groupBy("id,sw")
// .select("id,id.count");
// Table result = table.window(Slide.over("5.rows").every("2.rows").on("pt").as("sw"))
// .groupBy("id,sw")
// .select("id,id.count");
//SQL
tableEnv.createTemporaryView("sensor", table);
Table result = tableEnv.sqlQuery("select id,count(id) as ct from sensor " +
"group by id,hop(pt,INTERVAL '2' second,INTERVAL '6' second)");
//5.转换为流进行输出
tableEnv.toAppendStream(result, Row.class).print();
//6.执行
env.execute();
}
}
4、EventTime_Slide 滑动时间窗口
package com.atguigu.day07;
import com.atguigu.bean.SensorReading;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Slide;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
public class FlinkSQL03_ProcessTime_Slide {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS, "id,ts,temp,pt.proctime");
//4.TableAPI
// Table result = table.window(Slide.over("5.seconds").every("2.seconds").on("pt").as("sw"))
// .groupBy("id,sw")
// .select("id,id.count");
// Table result = table.window(Slide.over("5.rows").every("2.rows").on("pt").as("sw"))
// .groupBy("id,sw")
// .select("id,id.count");
//SQL
tableEnv.createTemporaryView("sensor", table);
Table result = tableEnv.sqlQuery("select id,count(id) as ct from sensor " +
"group by id,hop(pt,INTERVAL '2' second,INTERVAL '6' second)");
//5.转换为流进行输出
tableEnv.toAppendStream(result, Row.class).print();
//6.执行
env.execute();
}
}
5、ProcessTime_Session 会话窗口 处理时间
package com.atguigu.day07;
import com.atguigu.bean.SensorReading;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Session;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
public class FlinkSQL05_ProcessTime_Session {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS, "id,ts,temp,pt.proctime");
//4.会话窗口
// Table result = table.window(Session.withGap("5.seconds").on("pt").as("sw"))
// .groupBy("id,sw")
// .select("id,id.count");
//SQL
tableEnv.createTemporaryView("sensor", table);
Table result = tableEnv.sqlQuery("select id,count(id) as ct from sensor " +
"group by id,session(pt,INTERVAL '5' second)");
//5.转换为流进行输出
tableEnv.toAppendStream(result, Row.class).print();
//6.执行
env.execute();
}
}
6、EventTime_Session 会话窗口事件时间
package com.atguigu.day07;
import com.atguigu.bean.SensorReading;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Session;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
public class FlinkSQL05_ProcessTime_Session {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS, "id,ts,temp,pt.proctime");
//4.会话窗口
// Table result = table.window(Session.withGap("5.seconds").on("pt").as("sw"))
// .groupBy("id,sw")
// .select("id,id.count");
//SQL
tableEnv.createTemporaryView("sensor", table);
Table result = tableEnv.sqlQuery("select id,count(id) as ct from sensor " +
"group by id,session(pt,INTERVAL '5' second)");
//5.转换为流进行输出
tableEnv.toAppendStream(result, Row.class).print();
//6.执行
env.execute();
}
}
7、ProcessTime_Over 窗口函数处理时间
package com.atguigu.day07;
import com.atguigu.bean.SensorReading;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Over;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
public class FlinkSQL07_ProcessTime_Over {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS, "id,ts,temp,pt.proctime");
//4.基于Over开窗
// Table result = table.window(Over.partitionBy("id").orderBy("pt").as("ow"))
// .select("id,id.count over ow");
// Table result = table.window(Over.partitionBy("id").orderBy("pt").preceding("3.rows").as("ow"))
// .select("id,id.count over ow");
//SQL
tableEnv.createTemporaryView("sensor", table);
Table result = tableEnv.sqlQuery("select id,count(id) " +
"over(partition by id order by pt) as ct " +
"from sensor");
//5.转换为流打印
tableEnv.toRetractStream(result, Row.class).print();
//6.执行
env.execute();
}
}
8、EventTime_Over 窗口 函数事件时间
package com.atguigu.day07;
import com.atguigu.bean.SensorReading;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.table.api.Over;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
public class FlinkSQL08_EventTime_Over {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<String>(Time.seconds(1)) {
@Override
public long extractTimestamp(String element) {
String[] split = element.split(",");
return Long.parseLong(split[1]) * 1000L;
}
})
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS, "id,ts,temp,rt.rowtime");
//4.基于Over开窗
// Table result = table.window(Over.partitionBy("id").orderBy("rt").as("ow"))
// .select("id,id.count over ow");
Table result = table.window(Over.partitionBy("id").orderBy("rt").preceding("3.rows").as("ow"))
.select("id,id.count over ow");
//5.转换为流打印
tableEnv.toRetractStream(result, Row.class).print();
//6.执行
env.execute();
}
}
9、自定义UDF 一进一出 Function_ScalarFunc
package com.atguigu.day07;
import com.atguigu.bean.SensorReading;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.functions.ScalarFunction;
import org.apache.flink.types.Row;
public class FlinkSQL09_Function_ScalarFunc {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS);
//4.注册函数
tableEnv.registerFunction("MyLength", new MyLength());
//5.TableAPI
Table tableResult = table.select("id,id.MyLength");
//6.SQL
tableEnv.createTemporaryView("sensor", table);
Table sqlResult = tableEnv.sqlQuery("select id,MyLength(id) from sensor");
//7.转换为流打印数据
tableEnv.toAppendStream(tableResult, Row.class).print("tableResult");
tableEnv.toAppendStream(sqlResult, Row.class).print("sqlResult");
//8.执行
env.execute();
}
public static class MyLength extends ScalarFunction {
public int eval(String value) {
return value.length();
}
}
}
10、UDTF炸裂函数 1列变多行 Function_TableFunc
package com.atguigu.day07;
import com.atguigu.bean.SensorReading;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.functions.ScalarFunction;
import org.apache.flink.table.functions.TableFunction;
import org.apache.flink.types.Row;
public class FlinkSQL10_Function_TableFunc {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS);
//4.注册函数
tableEnv.registerFunction("Split", new Split());
//5.TableAPI
Table tableResult = table
.joinLateral("Split(id) as (word,length)")
.select("id,word,length");
//6.SQL
tableEnv.createTemporaryView("sensor", table);
Table sqlResult = tableEnv.sqlQuery("select id,word,length from sensor," +
"LATERAL TABLE(Split(id)) as T(word, length)");
//7.转换为流打印数据
tableEnv.toAppendStream(tableResult, Row.class).print("tableResult");
tableEnv.toAppendStream(sqlResult, Row.class).print("sqlResult");
//8.执行
env.execute();
}
public static class Split extends TableFunction<Tuple2<String, Integer>> {
public void eval(String value) {
String[] split = value.split("_");
for (String s : split) {
collector.collect(new Tuple2<>(s, s.length()));
}
}
}
}
11、聚合函数UDAF Function_AggFunc
package com.atguigu.day07;
import com.atguigu.bean.SensorReading;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.functions.AggregateFunction;
import org.apache.flink.table.functions.TableFunction;
import org.apache.flink.types.Row;
public class FlinkSQL11_Function_AggFunc {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS);
//4.注册函数
tableEnv.registerFunction("TempAvg", new TempAvg());
//5.TableAPI
Table tableResult = table
.groupBy("id")
.select("id,temp.TempAvg");
//6.SQL
tableEnv.createTemporaryView("sensor", table);
Table sqlResult = tableEnv.sqlQuery("select id,TempAvg(temp) from sensor group by id");
//7.转换为流打印数据
tableEnv.toRetractStream(tableResult, Row.class).print("tableResult");
tableEnv.toRetractStream(sqlResult, Row.class).print("sqlResult");
//8.执行
env.execute();
}
public static class TempAvg extends AggregateFunction<Double, Tuple2<Double, Integer>> {
//初始化缓冲区
@Override
public Tuple2<Double, Integer> createAccumulator() {
return new Tuple2<>(0.0D, 0);
}
//计算方法
public void accumulate(Tuple2<Double, Integer> buffer, Double value) {
buffer.f0 += value;
buffer.f1 += 1;
}
//获取返回值结果
@Override
public Double getValue(Tuple2<Double, Integer> accumulator) {
return accumulator.f0 / accumulator.f1;
}
}
}
12、多行输出多行 TOPN Function_TableAggFunc
package com.atguigu.day07;
import com.atguigu.bean.SensorReading;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.functions.AggregateFunction;
import org.apache.flink.table.functions.TableAggregateFunction;
import org.apache.flink.types.Row;
import org.apache.flink.util.Collector;
public class FlinkSQL12_Function_TableAggFunc {
public static void main(String[] args) throws Exception {
//1.创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.读取端口数据创建流,转换为JavaBean
SingleOutputStreamOperator<SensorReading> sensorDS = env.socketTextStream("hadoop102", 7777)
.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], Long.parseLong(fields[1]), Double.parseDouble(fields[2]));
});
//3.将流转换为表
Table table = tableEnv.fromDataStream(sensorDS);
//4.注册函数
tableEnv.registerFunction("Top2Temp", new Top2Temp());
//5.TableAPI
Table tableResult = table
.groupBy("id")
.flatAggregate("Top2Temp(temp) as (temp,rank)")
.select("id,temp,rank");
//6.转换为流打印数据
tableEnv.toRetractStream(tableResult, Row.class).print("tableResult");
//7.执行
env.execute();
}
public static class Top2Temp extends TableAggregateFunction<Tuple2<Double, Integer>, Tuple2<Double, Double>> {
@Override
public Tuple2<Double, Double> createAccumulator() {
return new Tuple2<>(Double.MIN_VALUE, Double.MIN_VALUE);
}
public void accumulate(Tuple2<Double, Double> buffer, Double value) {
//1.将输入数据跟第一个比较
if (value > buffer.f0) {
buffer.f1 = buffer.f0;
buffer.f0 = value;
} else if (value > buffer.f1) {
//2.将输入数据跟第二个比较
buffer.f1 = value;
}
}
public void emitValue(Tuple2<Double, Double> buffer, Collector<Tuple2<Double, Integer>> collector) {
collector.collect(new Tuple2<>(buffer.f0, 1));
if (buffer.f1 != Double.MIN_VALUE) {
collector.collect(new Tuple2<>(buffer.f1, 2));
}
}
}
}