文章目录
flink基础理论
https://blog.csdn.net/oTengYue/article/details/102689538?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522162494680916780357240553%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=162494680916780357240553&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~top_positive~default-1-102689538.first_rank_v2_pc_rank_v29_1&utm_term=flink&spm=1018.2226.3001.4187
导入依赖
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_2.11</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-statebackend-rocksdb_2.11</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.11</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_2.11</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.38</version>
</dependency>
<dependency>
<groupId>org.apache.bahir</groupId>
<artifactId>flink-connector-redis_2.11</artifactId>
<version>1.0</version>
</dependency>
<!--scala-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.11</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.10.1</version>
</dependency>
</dependencies>
wordcount
SocketTextStream 可以从Socket中读取字符串数据
public class WordCount {
public static void main(String[] args) throws Exception {
//1定义env执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(3);
//2source
DataStreamSource<String> inputDataStream = env.socketTextStream("192.168.232.211", 7777);
//3转换
SingleOutputStreamOperator<Tuple2<String, Integer>> result = inputDataStream
.flatMap(new MyFlatMapper())
.keyBy(0)
.sum(1);
//4sink
result.print("wordcount");
env.execute("java word count");
}
//自定义类 实现FlatMapFunction接口
public static class MyFlatMapper implements org.apache.flink.api.common.functions.FlatMapFunction<String, Tuple2<String,Integer>> {
@Override
public void flatMap(String s, Collector<Tuple2<String,Integer>> collector) throws Exception {
String[] words = s.split("\\s+");
for (String word : words) {
collector.collect(new Tuple2<String, Integer>(word,1));
}
}
}
}
从文件中读取数据
public class WordCount2 {
public static void main(String[] args) throws Exception {
//1定义env执行环境
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
//2source
String filePath="D:\\kb11\\flinkstu\\resource\\words.txt";
DataSource<String> stringDataSource = env.readTextFile(filePath);
//3转换
AggregateOperator<Tuple2<String, Integer>> result = stringDataSource.flatMap(new WordCount.MyFlatMapper())
.groupBy(0)
.sum(1);
//4sink
result.print();
}
private static class MyFlatMapper implements org.apache.flink.api.common.functions.FlatMapFunction<String, Tuple2<String,Integer>> {
@Override
public void flatMap(String s, Collector<Tuple2<String,Integer>> collector) throws Exception {
String[] words = s.split("\\s+");
for (String word : words) {
collector.collect(new Tuple2<String, Integer>(word,1));
}
}
}
}
source
collection
1、创建SensorReading类
public class SensorReading {
private String id;
private Long timestamp;
private Double temperature;
public SensorReading() {
}
public SensorReading(String id, Long timestamp, Double temperature) {
this.id = id;
this.timestamp = timestamp;
this.temperature = temperature;
}
@Override
public String toString() {
return "SensorReading{" +
"id='" + id + '\'' +
", timestamp=" + timestamp +
", temperature=" + temperature +
'}';
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public Long getTimestamp() {
return timestamp;
}
public void setTimestamp(Long timestamp) {
this.timestamp = timestamp;
}
public Double getTemperature() {
return temperature;
}
public void setTemperature(Double temperature) {
this.temperature = temperature;
}
}
public class Source1_Collection {
public static void main(String[] args) throws Exception {
//1创建flink环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
List<SensorReading> list = new ArrayList<>();
list.add(new SensorReading("sensor_1",1624853498L,37.5));
list.add(new SensorReading("sensor_2",1624853500L,36.5));
list.add(new SensorReading("sensor_3",1624853501L,37.1));
list.add(new SensorReading("sensor_5",1624853502L,37.2));
list.add(new SensorReading("sensor_8",1624853503L,37.3));
//2source
DataStreamSource<SensorReading> dataStreamSource = env.fromCollection(list);
//3sink
dataStreamSource.print("sensor");
env.execute("collectionSource");
}
}
file
public class Source2_File {
public static void main(String[] args) throws Exception {
//1创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2source
// DataStreamSource<String> stringDataStreamSource = env.socketTextStream("192.168.232.211", 7777);
DataStreamSource<String> stringDataStreamSource = env.readTextFile("D:\\kb11\\flinkstu\\resource\\words.txt");
stringDataStreamSource.print("filesource");
env.execute("filesource");
}
}
kafka
public class Kafka {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Properties prop = new Properties();
prop.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.232.211:9092");
prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"sensor_group1");
prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serizlization.StringDeserializer");
prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serizlization.StringDeserializer");
prop.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"latest");
DataStreamSource<String> sensor = env.addSource(new FlinkKafkaConsumer011<String>
("sensor", new SimpleStringSchema(), prop));
sensor.print();
env.execute("kafkademo");
}
}
自定义数据源
public class Source4 {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<SensorReading> dataStream = env.addSource(new MySensorSource());
dataStream.print("mySource");
env.execute("mySourceDemo");
}
private static class MySensorSource implements org.apache.flink.streaming.api.functions.source.SourceFunction<SensorReading> {
boolean flag = true;
@Override
public void run(SourceContext<SensorReading> sourceContext) throws Exception {
while (flag){
sourceContext.collect(
new SensorReading(
"sensor_"+new Random().nextInt(10),
System.currentTimeMillis(),
new Random().nextInt(9)+30.0
)
);
Thread.sleep(1000);
}
}
@Override
public void cancel() {
flag = false;
}
}
}
sink
kafka
public class Sink1_Kafka {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Properties prop = new Properties();
prop.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.232.211:9092");
prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"sensor_group2");
prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serizlization.StringDeserializer");
prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serizlization.StringDeserializer");
prop.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"latest");
DataStreamSource<String> dataStream = env.addSource(new FlinkKafkaConsumer011<String>
("sensor", new SimpleStringSchema(), prop));
SingleOutputStreamOperator<String> resultDataStream = dataStream.map(line -> {
String[] split = line.split(",");
String sensorReadingStr = new SensorReading(split[0], Long.parseLong(split[1]), Double.parseDouble(split[2])).toString();
return sensorReadingStr;
});
resultDataStream.addSink(new FlinkKafkaProducer011<String>(
"192.168.232.211:9092",
"sensorout",
new SimpleStringSchema()
));
env.execute("kafkademo1");
}
}
mysql
public class Sink2_Mysql {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
Properties prop = new Properties();
prop.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.232.211:9092");
prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"sensor_group3");
prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serizlization.StringDeserializer");
prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serizlization.StringDeserializer");
prop.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"latest");
DataStreamSource<String> dataStream = env.addSource(new FlinkKafkaConsumer011<String>
("sensor", new SimpleStringSchema(), prop));
SingleOutputStreamOperator<SensorReading> resultDataStream = dataStream.map(line -> {
String[] split = line.split(",");
SensorReading sensorReading = new SensorReading(split[0], Long.parseLong(split[1]), Double.parseDouble(split[2]));
return sensorReading;
});
resultDataStream.addSink(new MyJdbcSink());
env.execute("mysqlSinkDemo1");
}
private static class MyJdbcSink extends RichSinkFunction<SensorReading> {
Connection connection = null;
PreparedStatement insertstmt = null;
PreparedStatement updatestmt =null;
@Override
public void open(Configuration parameters) throws Exception {
//System.out.println("open");
connection = DriverManager.getConnection(
"jdbc:mysql://192.168.232.211:3306/flinkdemo?useSSL=false",
"root",
"ok"
);
//预编译
insertstmt = connection.prepareStatement("insert into sensor_temp(id,tem) values(?,?)");
updatestmt = connection.prepareStatement("update sensor_temp set tem=? where id=?");
}
@Override
public void close() throws Exception {
connection.close();
}
@Override
public void invoke(SensorReading value, Context context) throws Exception {
updatestmt.setDouble(1,value.getTemperature());
updatestmt.setString(2,value.getId());
updatestmt.execute();
if (updatestmt.getUpdateCount()==0) {
insertstmt.setString(1,value.getId());
insertstmt.setDouble(2,value.getTemperature());
insertstmt.execute();
}
}
}
}
redis
public class Sink3_Redis {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStreamSource<String> inputStream = env.readTextFile("D:\\kb11\\flinkstu\\resource\\sensor.txt");
SingleOutputStreamOperator<SensorReading> dataStream = inputStream.map(line -> {
String[] split = line.split(",");
return new SensorReading(split[0], Long.parseLong(split[1]), Double.parseDouble(split[2]));
});
FlinkJedisPoolConfig config = new FlinkJedisPoolConfig.Builder().setHost("192.168.232.211")
.setPort(6379)
.setDatabase(2)
.build();
dataStream.addSink(new RedisSink<>(config, new RedisMapper<SensorReading>() {
@Override
public RedisCommandDescription getCommandDescription() {
return new RedisCommandDescription(RedisCommand.HSET,"sensor_temp");
}
@Override
public String getKeyFromData(SensorReading sensorReading) {
return sensorReading.getId();
}
@Override
public String getValueFromData(SensorReading sensorReading) {
return sensorReading.getTemperature().toString();
}
}));
env.execute("redisdemo1");
}
}