Flink 没有类似于 spark 中 foreach 方法,让用户进行迭代的操作。虽有对外的输出操作都要利用 Sink 完成。最后通过类似如下方式完成整个任务最终输出操作。
stream.addSink(new MySink(xxxx))
官方提供了一部分的框架的 sink。除此以外,需要用户自定义实现 sink。
![](https://i-blog.csdnimg.cn/blog_migrate/20911b2344435a4cd8d0fd993628f607.png)
![](https://i-blog.csdnimg.cn/blog_migrate/9c0586aae90e0679e9022ee1d15f6d95.png)
5.0 File
package com.zhen.flink.api.sink
import com.zhen.flink.api.SensorReading
import org.apache.flink.api.common.serialization.SimpleStringEncoder
import org.apache.flink.core.fs.Path
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.scala._
/**
* @Author FengZhen
* @Date 6/8/22 10:43 PM
* @Description TODO
*/
object FileSink {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
// 0.读取数据
val filePath = "/Users/FengZhen/Desktop/accumulate/0_project/flink_learn/src/main/resources/data/sensor.txt"
val inputStream = env.readTextFile(filePath)
// 1.先转换成样例数据
val dataStream: DataStream[SensorReading] = inputStream
.map(
data => {
val arr = data.split(",")
SensorReading(arr(0), arr(1).toLong, arr(2).toDouble)
}
)
dataStream.print()
val outFilePath = "/Users/FengZhen/Desktop/accumulate/0_project/flink_learn/src/main/resources/data/sensor_out.txt"
dataStream.writeAsCsv(outFilePath)
val outFilePath1 = "/Users/FengZhen/Desktop/accumulate/0_project/flink_learn/src/main/resources/data/sensor_out_1.txt"
dataStream.addSink(
StreamingFileSink.forRowFormat(
new Path(outFilePath1),
new SimpleStringEncoder[SensorReading]()
).build()
)
env.execute("file sink.")
}
}
5.1 Kafka
package com.zhen.flink.api.sink
import java.util.Properties
import com.zhen.flink.api.SensorReading
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.core.fs.Path
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer, FlinkKafkaProducer}
/**
* @Author FengZhen
* @Date 6/11/22 3:20 PM
* @Description TODO
*/
object KafkaSink {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
// 0.读取数据
val filePath = "/Users/FengZhen/Desktop/accumulate/0_project/flink_learn/src/main/resources/data/sensor.txt"
val inputStream = env.readTextFile(filePath)
//从kafka读取数据
val properties = new Properties()
properties.setProperty("bootstrap.servers", "localhost:9092")
properties.setProperty("group.id", "consumer-group")
properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
properties.setProperty("auto.offset.reset", "latest")
val streamKafka = env.addSource( new FlinkKafkaConsumer[String](
"topic_sensor",
new SimpleStringSchema(),
properties
))
// 1.先转换成样例数据
val dataStream: DataStream[String] = streamKafka
.map(
data => {
val arr = data.split(",")
SensorReading(arr(0), arr(1).toLong, arr(2).toDouble).toString
}
)
dataStream.addSink(
new FlinkKafkaProducer[String]("localhost:9092", "topic_flink_kafka_sink", new SimpleStringSchema())
)
//./bin/kafka-console-producer.sh --broker-list localhost:9092 --topic topic_sensor
// ./bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic topic_flink_kafka_sink
env.execute("kafka sink.")
}
}
5.2 Redis
package com.zhen.flink.api.sink
import com.zhen.flink.api.SensorReading
import org.apache.flink.api.common.serialization.SimpleStringEncoder
import org.apache.flink.core.fs.Path
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.redis.RedisSink
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig
import org.apache.flink.streaming.connectors.redis.common.mapper.{RedisCommand, RedisCommandDescription, RedisMapper}
/**
* @Author FengZhen
* @Date 6/12/22 8:23 PM
* @Description TODO
*/
object RedisSink {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
// 0.读取数据
val filePath = "/Users/FengZhen/Desktop/accumulate/0_project/flink_learn/src/main/resources/data/sensor.txt"
val inputStream = env.readTextFile(filePath)
// 1.先转换成样例数据
val dataStream: DataStream[SensorReading] = inputStream
.map(
data => {
val arr = data.split(",")
SensorReading(arr(0), arr(1).toLong, arr(2).toDouble)
}
)
// 定义一个FlinkJedisConfigBase
val conf = new FlinkJedisPoolConfig.Builder()
.setHost("localhost")
.setPort(6379)
.setDatabase(1)
.build()
dataStream.addSink( new RedisSink[SensorReading](conf, new MyRedisMapper))
env.execute("redis sink.")
}
// 定义一个redis mapper
class MyRedisMapper extends RedisMapper[SensorReading]{
// 定义保存数据写入Redis的命令,HSET 表名 key value
override def getCommandDescription: RedisCommandDescription = {
new RedisCommandDescription(RedisCommand.HSET, "sensor_temp")
}
// 将ID指定位可以
override def getKeyFromData(t: SensorReading): String =
t.id
// 将温度指定为value
override def getValueFromData(t: SensorReading): String =
t.temperature.toString
}
}
5.3 Elasticsearch
package com.zhen.flink.api.sink
import java.util
import com.zhen.flink.api.SensorReading
import org.apache.flink.api.common.functions.RuntimeContext
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.elasticsearch.{ElasticsearchSinkBase, ElasticsearchSinkFunction, RequestIndexer}
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink
import org.apache.http.HttpHost
import org.elasticsearch.client.Requests
/**
* @Author FengZhen
* @Date 6/17/22 3:39 PM
* @Description TODO
*/
object ElasticsearchSinkTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
// 0.读取数据
val filePath = "/Users/FengZhen/Desktop/accumulate/0_project/flink_learn/src/main/resources/data/sensor.txt"
val inputStream = env.readTextFile(filePath)
// 1.先转换成样例数据
val dataStream: DataStream[SensorReading] = inputStream
.map(
data => {
val arr = data.split(",")
SensorReading(arr(0), arr(1).toLong, arr(2).toDouble)
}
)
// 定义HttpHosts
val httpHosts = new util.ArrayList[HttpHost]()
httpHosts.add(new HttpHost("localhost", 9200))
// 自定义写入ES的EsSinkFunction
val myEsSinkFunc = new ElasticsearchSinkFunction[SensorReading] {
override def process(element: SensorReading, ctx: RuntimeContext, indexer: RequestIndexer): Unit = {
// 包装一个map作为DataSource
val dataSource = new util.HashMap[String, String]()
dataSource.put("id", element.id)
dataSource.put("temperature", element.temperature.toString)
dataSource.put("ts", element.timestamp.toString)
// 创建index request,用于发送http请求
val indexRequest = Requests.indexRequest()
.index("sensor")
.`type`("reading_data")
.source(dataSource)
// 用indexer发送请求
indexer.add(indexRequest)
}
}
dataStream.addSink(
new ElasticsearchSink.Builder[SensorReading](httpHosts, myEsSinkFunc)
.build()
)
env.execute("elasticsearch sink.")
}
}
5.4 JDBC自定义sink
package com.zhen.flink.api.sink
import java.sql.{Connection, DriverManager, PreparedStatement}
import com.zhen.flink.api.SensorReading
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.scala._
/**
* @Author FengZhen
* @Date 7/1/22 2:21 PM
* @Description TODO
*/
object JdbcSink {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
// 0.读取数据
val filePath = "/Users/FengZhen/Desktop/accumulate/0_project/flink_learn/src/main/resources/data/sensor.txt"
val inputStream = env.readTextFile(filePath)
// 1.先转换成样例数据
val dataStream: DataStream[SensorReading] = inputStream
.map(
data => {
val arr = data.split(",")
SensorReading(arr(0), arr(1).toLong, arr(2).toDouble)
}
)
dataStream.addSink(new MyJdbcSinkFunc())
env.execute("jdbc sink")
}
class MyJdbcSinkFunc() extends RichSinkFunction[SensorReading]{
// 定义连接、预编译语句
var conn: Connection = _
var insertStmt: PreparedStatement = _
var updateStmt: PreparedStatement = _
override def open(parameters: Configuration): Unit = {
conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/test", "root", "1234qwer")
insertStmt = conn.prepareStatement("insert into sensor_temp (id, temp) values (?,?)")
updateStmt = conn.prepareStatement("update sensor_temp set temp = ? where id = ?")
}
override def invoke(value: SensorReading, context: SinkFunction.Context): Unit = {
// 先执行更新操作,查到就更新
updateStmt.setDouble(1, value.temperature)
updateStmt.setString(2, value.id)
updateStmt.execute()
//如果更新没有查到数据,那么就插入
if(updateStmt.getUpdateCount == 0){
insertStmt.setString(1, value.id)
insertStmt.setDouble(2, value.temperature)
insertStmt.execute()
}
}
override def close(): Unit = {
insertStmt.close()
updateStmt.close()
conn.close()
}
}
}
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhen.flink</groupId>
<artifactId>flink_learn</artifactId>
<version>1.0-SNAPSHOT</version>
<name>flink_learn Maven</name>
<properties>
<scala_version>2.12</scala_version>
<flink_version>1.13.1</flink_version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala_version}</artifactId>
<version>${flink_version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_${scala_version}</artifactId>
<version>${flink_version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_${scala_version}</artifactId>
<version>${flink_version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_${scala_version}</artifactId>
<version>${flink_version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.bahir/flink-connector-redis -->
<dependency>
<groupId>org.apache.bahir</groupId>
<artifactId>flink-connector-redis_2.11</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-elasticsearch6_${scala_version}</artifactId>
<version>${flink_version}</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.44</version>
</dependency>
</dependencies>
<build>
<plugins> <!-- 该插件用于将 Scala 代码编译成 class 文件 -->
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.4.6</version>
<executions>
<execution> <!-- 声明绑定到 maven 的 compile 阶段 -->
<goals>
<goal>compile</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.0.0</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>