Flink Sink
-
Flink
没有类似于Spark
中foreach
方法,让用户进行迭代的操作。所以对外的输出操作都要利用Sink
完成,最后通过类似如下的方式完成整个任务最终的输出操作。stream.addSink(new MySink(...))
1. Kafka
-
pom.xml
<dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-kafka-0.11_2.11</artifactId> <version>1.7.2</version> </dependency>
-
主要代码
import com.guli.source.SensorReading import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.streaming.api.scala._ import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011 object KafkaSink { def main(args: Array[String]): Unit = { // 1. 创建执行环境 val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment // 2. 设置并行度 env.setParallelism(1) // 3. Source & Transform val input: DataStream[String] = env.readTextFile("/Users/zgl/Documents/IdeaProjects/FlinkTutorial/src/sensors.txt") .map(data => { val params = data.split(",") SensorReading(params(0).trim, params(1).trim.toLong, params(2).trim.toDouble).toString }) // 4. Sink input.addSink(new FlinkKafkaProducer011[String]("hadoop101:9092","test",new SimpleStringSchema())) input.print() // 5. 执行任务 env.execute("testKafkaSink...") } }
2. Redis
-
pom.xml
<dependency> <groupId>org.apache.bahir</groupId> <artifactId>flink-connector-redis_2.11</artifactId> <version>1.0</version> </dependency>
-
主要代码:
import com.guli.source.SensorReading import org.apache.flink.streaming.api.scala._ import org.apache.flink.streaming.connectors.redis.RedisSink import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig import org.apache.flink.streaming.connectors.redis.common.mapper.{RedisCommand, RedisCommandDescription, RedisMapper} object TestRedisSink { def main(args: Array[String]): Unit = { // 1. 创建执行环境 val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment // 2. 设置并行度 env.setParallelism(1) // 3. Source & Transform val input: DataStream[SensorReading] = env.readTextFile("/Users/zgl/Documents/IdeaProjects/FlinkTutorial/src/sensors.txt") .map(data => { val params = data.split(",") SensorReading(params(0).trim, params(1).trim.toLong, params(2).trim.toDouble) }) // 4. Sink val conf: FlinkJedisPoolConfig = new FlinkJedisPoolConfig.Builder().setHost("localhost").setPort(6379).build() input.addSink(new RedisSink[SensorReading](conf, new MyRedisMapper)) input.print() // 5. 执行任务 env.execute("testRedisSink...") } } class MyRedisMapper extends RedisMapper[SensorReading] { // 定义执行操作 override def getCommandDescription: RedisCommandDescription = { new RedisCommandDescription(RedisCommand.HSET, "test") } // 获取值 override def getValueFromData(t: SensorReading): String = t.temperature.toString // 获取键 override def getKeyFromData(t: SensorReading): String = t.id }
3. JDBC 自定义 sink
-
pom.xml
<dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.44</version> </dependency>
-
主要代码:
import java.sql.{Connection, DriverManager, PreparedStatement} import com.guli.source.SensorReading import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction} import org.apache.flink.streaming.api.scala._ object TestJDBCSink { def main(args: Array[String]): Unit = { // 1. 创建执行环境 val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment // 2. 设置并行度 env.setParallelism(1) // 3. Source & Transform val input: DataStream[SensorReading] = env.readTextFile("/Users/zgl/Documents/IdeaProjects/FlinkTutorial/src/sensors.txt") .map(data => { val params = data.split(",") SensorReading(params(0).trim, params(1).trim.toLong, params(2).trim.toDouble) }) // 4. Sink input.addSink(new JDBCSink) input.print() // 5. 执行任务 env.execute("testRedisSink...") } } class JDBCSink extends RichSinkFunction[SensorReading] { var conn: Connection = _ var insertStmt: PreparedStatement = _ var updateStmt: PreparedStatement = _ override def open(parameters: Configuration): Unit = { super.open(parameters) conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/test", "root", "mysql") insertStmt = conn.prepareStatement("insert into sensor values(?,?)") updateStmt = conn.prepareStatement("update sensor set temp = ? where id = ?") } override def invoke(value: SensorReading, context: SinkFunction.Context[_]): Unit = { updateStmt.setDouble(1, value.temperature) updateStmt.setString(2, value.id) updateStmt.execute() if (updateStmt.getUpdateCount == 0) { insertStmt.setString(1, value.id) insertStmt.setDouble(2, value.temperature) insertStmt.execute() } } override def close(): Unit = { insertStmt.close() updateStmt.close() conn.close() } }