目录
file sink
首先是将数据写入到文件中
import org.apache.flink.streaming.api.scala._
object FileSink {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
//这里设置并行度为1,如果不设置会生成多个分区的文件
env.setParallelism(1)
val flieDS = env.readTextFile("data\\students.txt")
val clazzDS = flieDS.map(_.split(",")(4))
.map((_, 1))
.keyBy(_._1)
.reduce((x, y) => (x._1, x._2 + y._2))
clazzDS.writeAsCsv("data\\out.txt")
clazzDS.print()
env.execute()
}
}
不过现在writeAsCsv等方法已经标记过时
那么我们要学习比较麻烦的写法,通过addSink方法实现写入文件
import org.apache.flink.api.common.serialization.SimpleStringEncoder
import org.apache.flink.core.fs.Path
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink
import org.apache.flink.streaming.api.scala._
object FileSink {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val flieDS = env.readTextFile("data\\students.txt")
val stuDS: DataStream[Student] = flieDS.map(line => {
val stuarr: Array[String] = line.split(",")
Student(stuarr(0), stuarr(1), stuarr(2).toInt, stuarr(3), stuarr(4))
})
//writeAsCsv已经过时
// clazzDS.writeAsCsv("data\\out.txt")
//forRowFormat,普通文件,按行存储即可
stuDS.addSink(StreamingFileSink.forRowFormat(
new Path("data\\out.txt"),
new SimpleStringEncoder[Student]()
).build()
)
flieDS.print()
env.execute()
}
}
case class Student(id:String,name:String,age:Int,gender:String,clazz:String)
输出结果:
KafkaSink
导入依赖
<dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-kafka_2.11</artifactId> <version>1.11.2</version> </dependency>
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer
object KafkaSink {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val flieDS = env.readTextFile("data\\students.txt")
val stuDS: DataStream[String] = flieDS.map(line => {
val stuarr: Array[String] = line.split(",")
Student(stuarr(0), stuarr(1), stuarr(2).toInt, stuarr(3), stuarr(4)).toString
})
//如果还是使用样例类,考虑序列化非常麻烦,还是toString后,使用String类型比较方便
stuDS.addSink(new FlinkKafkaProducer[String]("doker:9092",
"test_topic1",
new SimpleStringSchema()
))
env.execute()
}
}
首先启动zookeeper和kafka
kafka-server-start.sh -daemon /usr/local/soft/kafka_2.11-1.0.0/config/server.properties
首先我们创建topic,我使用的是伪分布式
kafka-topics.sh --create --zookeeper doker:2181 --replication-factor 1 --partitions 3 --topic test_topic1
创建消费者
kafka-console-producer.sh --broker-list doker:9092 --topic test_topic1
启动flink代码
RedisSink
导入依赖
<dependency> <groupId>org.apache.bahir</groupId> <artifactId>flink-connector-redis_2.11</artifactId> <version>1.0</version> </dependency>
flinkJedisConfigBase:传入Jedis连接
RedisMapper :定义写入redis的数据和命令
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.redis.RedisSink
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig
import org.apache.flink.streaming.connectors.redis.common.mapper.{RedisCommand, RedisCommandDescription, RedisMapper}
object RedisSinkTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val flieDS = env.readTextFile("data\\students.txt")
val stuDS: DataStream[Student] = flieDS.map(line => {
val stuarr: Array[String] = line.split(",")
Student(stuarr(0), stuarr(1), stuarr(2).toInt, stuarr(3), stuarr(4))
})
//定义flinkJedisConfigBase
val conf= new FlinkJedisPoolConfig.Builder()
.setHost("doker")
.setPort(6379)
.build()
//传入配置文件(redis连接),mapper(写入reids的命令,写入的数据)
stuDS.addSink(new RedisSink[Student](conf,new MyRedisMapper))
env.execute()
}
}
class MyRedisMapper extends RedisMapper[Student]{
//定义写入redis的命令,HSET 表名 key value
override def getCommandDescription: RedisCommandDescription = {
new RedisCommandDescription(RedisCommand.HSET,"student_message")
}
//将
override def getKeyFromData(data: Student): String = data.id
override def getValueFromData(data: Student): String = data.name
}
成功写入redis中:
JdbcSlink
import java.sql.{Connection, DriverManager}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
import org.apache.flink.streaming.api.scala._
object MysqlSinkTest{
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val linesDS = env.readTextFile("data/students.txt")
linesDS.addSink(new JdbcSink())
env.execute()
}
}
class JdbcSink extends RichSinkFunction[String]{
var conn:Connection=_
override def open(parameters: Configuration): Unit = {
Class.forName("com.mysql.jdbc.Driver")
conn = DriverManager.getConnection(
"jdbc:mysql://doker:3306/stu?useUnicode=true&characterEncoding=utf-8",
"root",
"123456")
}
override def invoke(value: String, context: SinkFunction.Context[_]): Unit = {
val splits = value.split(",")
val id = splits(0)
val name = splits(1)
val age = splits(2).toInt
val gender = splits(3)
val clazz = splits(4)
val ps = conn.prepareStatement("insert into student(id,name,age,gender,clazz) values (?,?,?,?,?) ")
ps.setString(1,id)
ps.setString(2,name)
ps.setInt(3,age)
ps.setString(4,gender)
ps.setString(5,clazz)
ps.execute()
}
override def close(): Unit = {
conn.close()
}
}
写入成功: