前言
flink现在是越来越火了,我最近也有在看flink官网,上周五一个朋友叫我写个flink读取kafka,还有读取mysql,本人比较苦逼,没有实验集群,样例是写出来了,能不能成事,你们用自己的集群测试一下
- flink-1.6.2
- kafka-0.10
Maven pom.xml
<!--hadoop-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<!-- flink-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.10_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-jdbc</artifactId>
<version>${flink.version}</version>
</dependency>
<!--Hbase lib库 -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-protocol</artifactId>
<version>${hbase.version}</version>
</dependency>
<!--JDBC-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.40</version>
</dependency>
可能会用到上面的依赖jar包
flink与kafka
先看一下官网给的kafka 集成flink示例吧
Kafka010Example
官网示例地址
package com.learn.Flink.kafka
import org.apache.flink.api.common.restartstrategy.RestartStrategies
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer010, FlinkKafkaProducer010}
import org.apache.flink.api.scala._
/**
* @Author: king
* @Datetime: 2018/10/16
* @Desc: TODO
*
*/
object Kafka010Example {
def main(args: Array[String]): Unit = {
// 解析输入参数
val params = ParameterTool.fromArgs(args)
if (params.getNumberOfParameters < 4) {
println("Missing parameters!\n"
+ "Usage: Kafka --input-topic <topic> --output-topic <topic> "
+ "--bootstrap.servers <kafka brokers> "
+ "--zookeeper.connect <zk quorum> --group.id <some id> [--prefix <prefix>]")
return
}
val prefix = params.get("prefix", "PREFIX:")
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.getConfig.disableSysoutLogging
env.getConfig.setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000))
// 每隔5秒创建一个检查点
env.enableCheckpointing(5000)
// 在Web界面中提供参数
env.getConfig.setGlobalJobParameters(params)
// 为卡夫卡0.10 x创建一个卡夫卡流源用户
val kafkaConsumer = new FlinkKafkaConsumer010(
params.getRequired("input-topic"),
new SimpleStringSchema,
params.getProperties)
//消费kafka数据
/*val transaction = env
.addSource(
new FlinkKafkaConsumer010[String](
params.getRequired("input-topic"),
new SimpleStringSchema,
params.getProperties))
transaction.print()*/
//消费kafka数据
val messageStream = env
.addSource(kafkaConsumer)
.map(in => prefix + in)
messageStream.print()
// 为卡夫卡0.10 X创建一个生产者
val kafkaProducer = new FlinkKafkaProducer010(
params.getRequired("output-topic"),
new SimpleStringSchema,
params.getProperties)
// 将数据写入kafka
messageStream.addSink(kafkaProducer)
env.execute("Kafka 0.10 Example")
}
}
自己写了一个flink消费kafka里面的数据
ReadingFromKafka
package com.learn.Flink.kafka
import java.util.Properties
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.flink.api.scala._
/**
* @Author: king
* @Datetime: 2018/11/26
* @Desc: TODO
*
*/
object ReadingFromKafka {
private val ZOOKEEPER_HOST = "master:2181,worker1:2181,worker2:2181"
private val KAFKA_BROKER = "master:9092,worker1:9092,worker2:9092"
private val TRANSACTION_GROUP = "transaction"
def main(args: Array[String]) {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
env.enableCheckpointing(1000)
env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
// configure Kafka consumer
val kafkaProps = new Properties()
kafkaProps.setProperty("zookeeper.connect", ZOOKEEPER_HOST)
kafkaProps.setProperty("bootstrap.servers", KAFKA_BROKER)
kafkaProps.setProperty("group.id", TRANSACTION_GROUP)
//topicd的名字是new,schema默认使用SimpleStringSchema()即可
val transaction = env
.addSource(
new FlinkKafkaConsumer010[String]("new", new SimpleStringSchema, kafkaProps)
)
transaction.print()
env.execute()
}
}
然后觉得消费了总要写到哪里去吧,那就写到hbase里面去(自己验证一下,我没有实验集群去验证代码)
package com.learn.Flink.kafka
import java.text.SimpleDateFormat
import java.util.{Date, Properties}
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.hadoop.hbase.client.{ConnectionFactory, Put}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName}
import org.apache.flink.api.scala._
/**
* @Author: king
* @Datetime: 2018/11/23
* @Desc: TODO
*
*/
object Flink2hbase {
val ZOOKEEPER_URL = "hostname1:port,hostname2:port,hostname3:port"
val KAFKA_URL = "hostname1:port,hostname2:port,hostname3:port"
val columnFamily = "info"
val tableName = TableName.valueOf("Flink2HBase")
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
//设置启动检查点(很重要)
env.enableCheckpointing(1000)
// 设置为TimeCharacteristic.EventTime
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
val props = new Properties()
props.setProperty("zookeeper.connect", ZOOKEEPER_URL)
props.setProperty("bootstrap.servers", KAFKA_URL)
props.setProperty("grou.idp", "flink-kafka")
val transction = env.addSource(
new FlinkKafkaConsumer010[String]("test", new SimpleStringSchema, props))
transction.rebalance.map { value =>
print(value)
writeIntoHBase(value)
}
env.execute()
}
def writeIntoHBase(m: String): Unit = {
val hbaseconf = HBaseConfiguration.create
hbaseconf.set("hbase.zookeeper.quorum", ZOOKEEPER_URL)
hbaseconf.set("hbase.defaults.for.version.skip", "ture")
val connection = ConnectionFactory.createConnection(hbaseconf)
val admin = connection.getAdmin
if (!admin.tableExists(tableName)) {
admin.createTable(new HTableDescriptor(tableName).addFamily(new HColumnDescriptor(columnFamily)))
}
val table = connection.getTable(tableName)
val df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
val put = new Put(Bytes.toBytes(df.format(new Date())))
put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes("test"), Bytes.toBytes(m))
table.put(put)
table.close()
}
}
flink与JDBC
package com.learn.Flink.demo
import java.sql.Types
import org.apache.flink.api.common.typeinfo.BasicTypeInfo
import org.apache.flink.api.java.io.jdbc.{JDBCInputFormat, JDBCOutputFormat}
import org.apache.flink.api.java.typeutils.RowTypeInfo
import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.flink.streaming.api.scala._
import org.apache.flink.types.Row
/**
* @Author: king
* @Datetime: 2018/11/23
* @Desc: TODO
*
*/
object JDBCSouce {
val driverClass = "com.mysql.jdbc.Driver"
val dbUrl = "jdbc:mysql://172.17.17.89:3306/test"
val userNmae = "usr_test"
val passWord = "usr_test"
def main(args: Array[String]): Unit = {
// 运行环境
val env = ExecutionEnvironment.getExecutionEnvironment
// 插入一组数据
// 准备数据
val row1 = new Row(3)
row1.setField(0, 1)
row1.setField(1, "shabi")
row1.setField(2, 20)
val row2 = new Row(3)
row2.setField(0, 2)
row2.setField(1, "doubi")
row2.setField(2, 22)
val rows: Array[Row] = Array(row1, row2)
// 插入数据
//insertRows(rows)
// 查看所有数据
selectAllFields(env)
// 更新某行
val row22 = new Row(3)
row22.setField(0, 2)
row22.setField(1, "")
row22.setField(2, 25)
//updateRow(row22)
}
/**
* 插入数据
*/
def insertRows(rows: Array[Row]): Unit = {
// 准备输出格式
val jdbcOutputFormat = JDBCOutputFormat.buildJDBCOutputFormat()
.setDrivername(driverClass)
.setDBUrl(dbUrl)
.setUsername(userNmae)
.setPassword(passWord)
.setQuery("insert into flink_test values(?,?,?)")
// 需要对应到表中的字段
.setSqlTypes(Array[Int](Types.INTEGER, Types.VARCHAR, Types.INTEGER))
.finish()
// 连接到目标数据库,并初始化preparedStatement
jdbcOutputFormat.open(0, 1)
// 添加记录到 preparedStatement,此时jdbcOutputFormat需要确保是开启的
// 未指定列类型时,此操作可能会失败
for (row <- rows) {
jdbcOutputFormat.writeRecord(row)
}
// 执行preparedStatement,并关闭此实例的所有资源
jdbcOutputFormat.close()
}
/* /**
* 更新某行数据(官网没给出更新示例,不知道实际是不是这样更新的)
*
* @param row 更新后的数据
*/
def updateRow(row: Row): Unit = {
// 准备输出格式
val jdbcOutputFormat = JDBCOutputFormat.buildJDBCOutputFormat()
.setDrivername(driverClass)
.setDBUrl(dbUrl)
.setUsername(userNmae)
.setPassword(passWord)
.setQuery("update flink_test set name = ?, password = ? where id = ?")
// 需要对应到行rowComb中的字段类型
.setSqlTypes(Array[Int](Types.VARCHAR, Types.VARCHAR, Types.INTEGER))
.finish()
// 连接到目标数据库,并初始化preparedStatement
jdbcOutputFormat.open(0, 1)
// 组装sql中对应的字段,rowComb中的字段个数及类型需要与sql中的问号一致
val rowComb = new Row(3)
rowComb.setField(0, row.getField(1).asInstanceOf[String])
rowComb.setField(1, row.getField(2).asInstanceOf[Int])
rowComb.setField(2, row.getField(0).asInstanceOf[Int])
// 添加记录到 preparedStatement,此时jdbcOutputFormat需要确保是开启的
// 未指定列类型时,此操作可能会失败
jdbcOutputFormat.writeRecord(rowComb)
// 执行preparedStatement,并关闭此实例的所有资源
jdbcOutputFormat.close()
}
*/
/**
* 查询所有字段
*
* @return
*/
def selectAllFields(env: ExecutionEnvironment) = {
val inputBuilder = JDBCInputFormat.buildJDBCInputFormat()
.setDrivername(driverClass)
.setDBUrl(dbUrl)
.setUsername(userNmae)
.setPassword(passWord)
.setQuery("select * from flink_test;")
// 这里第一个字段类型写int会报类型转换异常。
.setRowTypeInfo(new RowTypeInfo(
BasicTypeInfo.INT_TYPE_INFO,
BasicTypeInfo.STRING_TYPE_INFO,
BasicTypeInfo.INT_TYPE_INFO))
val source = env.createInput(inputBuilder.finish)
source.print()
}
}