分为两个部分主要是从mysql的数据库中取出数据和将数据插入到mysql数据库中。
从MySQL数据库中提取数据并与Kafka数据合并
1 创建一个类继承RichMapFunction[IN,OUT] (这里的in就是进来的数据类型,Out是处理完的数据类型,这里最好创建一个样例类,用来包装最后想要的数据)
class MySqlFunction extends RichMapFunction[String, Order_Event]
2 定义mysql的连接并将想要查询的数据放入到一个HashMap中
//定义SQL的连接、预编译器,给定初始值占位符
private var connection: Connection = _
// 用户信息缓存
private val cache = new util.HashMap[String, (String, String, Double)]()
override def open(parameters: Configuration): Unit = {
// 定义mysql数据库连接url和驱动程序及账号、密码
val url = "jdbc:mysql://192.168.45.13:3306/ds_db01?useSSL=false"
val driver = "com.mysql.jdbc.Driver"
val username = "root"
val password = "123456"
// 加载Mysql JDBC驱动
Class.forName(driver)
connection = DriverManager.getConnection(url, username, password)
// 执行SQL语句
val sql = "select customer_id,customer_name,mobile_phone,customer_money from customer_inf"
// 执行预编译 SQL 语句
val stmt = connection.prepareStatement(sql)
// 执行查询
val rs = stmt.executeQuery()
// 把数据放入到cache中
while (rs.next()) {
val id = rs.getString("customer_id")
val consignee = rs.getString("customer_name")
val consignee_tel = rs.getString("mobile_phone")
val final_total_amount = rs.getDouble("customer_money")
cache.put(id, (consignee, consignee_tel, final_total_amount))
}
connection.close()
}
3 将kafka数据流的数据和mysql中查询到的数据包装成最后想要输出的样例类
override def map(in: Order_Master_Data): Order_Event = {
val data = in.getData
val feight_fee = data.shipping_money
val id = data.customer_id.toString
Order_Event(id, cache.get(id)._1, cache.get(id)._2, cache.get(id)._3, feight_fee)
}
}
将数据存入MySQL数据库
1 创建一个类继承RichSinkFunction[IN] (这里放的是进入的数据类型)
class MyJDBCSinkProcessFunction extends RichSinkFunction[Order_Event]
2 定义mysql的数据连接以及定义预编译sql
private var connection: Connection = _
private var stmt: PreparedStatement = _
override def open(parameters: Configuration): Unit = {
val url = "jdbc:mysql://192.168.45.13:3306/ds_db01?useSSL=false"
val driver = "com.mysql.jdbc.Driver"
val username = "root"
val password = "123456"
Class.forName(driver)
connection = DriverManager.getConnection(url, username, password)
val sql = "insert into order_info(id,consignee,consignee_tel,final_total_amount,feight_fee) values(?,?,?,?,?)"
stmt = connection.prepareStatement(sql)
}
3 设置预编译语句中的数据,最后执行update SQL
override def invoke(value: Order_Event, context: SinkFunction.Context): Unit = {
stmt.setString(1, value.id)
stmt.setString(2, value.consignee)
stmt.setString(3, value.consignee_tel)
stmt.setDouble(4, value.final_total_amount)
stmt.setDouble(5, value.feight_fee)
// 执行update SQL
stmt.executeUpdate()
}
4 关闭连接
override def close(): Unit = {
stmt.close()
connection.close()
}
我们这里举个题目做个例子
采用双流JOIN的方式(本系统稳定,无需担心数据迟到与丢失的问题,建议使用滚动窗口),结合订单信息和订单详细信息,拼接成如下表所示格式,其中包含订单id、订单总金额、商品数,将数据存入ClickHouse数据库shtd_result的orderpostiveaggr表中(表结构如下),然后在Linux的ClickHouse命令行中根据sn降序排序,查询出前5条。
完整的代码:
object Flink9 {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val properties = new Properties()
properties.setProperty("bootstrap.servers", "192.168.45.13:9092,192.168.45.14:9092,192.168.45.15:9092")
properties.setProperty("group.id", "pt-test9")
val kafkaConsumer = new FlinkKafkaConsumer[String]("order", new SimpleStringSchema(), properties)
val order_Stream = env.addSource(kafkaConsumer)
.filter(_.length > 150)
.map(data => {
val str = data.replaceAll("NULL", "0")
val gson = new Gson()
gson.fromJson(str, classOf[Order_Detail_Data])
})
.filter(data => {
val table_name = data.getTable
table_name.equals("order_detail")
})
.keyBy(r => 1)
.process(new KeyedProcessFunction[Int, Order_Detail_Data, util.HashMap[String, (Double, Long)]] {
lazy val totalMap: MapState[String, (Double, Long)] = getRuntimeContext.getMapState(new MapStateDescriptor[String, (Double, Long)]("totalMap", classOf[String], classOf[(Double, Long)]))
override def processElement(i: Order_Detail_Data, context: KeyedProcessFunction[Int, Order_Detail_Data, util.HashMap[String, (Double, Long)]]#Context, collector: Collector[util.HashMap[String, (Double, Long)]]): Unit = {
val map = new util.HashMap[String, (Double, Long)]()
totalMap.keys().forEach(data => map.put(data, (totalMap.get(data)._1, totalMap.get(data)._2)))
val data = i.getData
val order_sn = data.order_sn
if (map.containsKey(order_sn)) {
map.put(order_sn, (map.get(order_sn)._1 + data.product_price, map.get(order_sn)._2 + data.product_cnt))
} else {
map.put(order_sn, (data.product_price, data.product_cnt))
}
collector.collect(map)
}
})
order_Stream.addSink(new RichSinkFunction[util.HashMap[String, (Double, Long)]] {
//定义SQL的连接、预编译器,给定初始值占位符
var connection: Connection = _
var stmt: PreparedStatement = _
override def open(parameters: Configuration): Unit = {
// 定义连接url和驱动程序及账号、密码
val url = "jdbc:clickhouse://192.168.45.13/shtd_result?useSSL=false"
val driver = "ru.yandex.clickhouse.ClickHouseDriver"
val username = "default"
val password = "123456"
Class.forName(driver)
connection = DriverManager.getConnection(url, username, password)
val sql = "insert into orderpostiveaggr(sn,orderprice,orderdetailcount) values(?,?,?)"
stmt = connection.prepareStatement(sql)
}
override def invoke(value: util.HashMap[String, (Double, Long)], context: SinkFunction.Context): Unit = {
value.forEach { case (key, (double, long)) => {
stmt.setString(1, key)
stmt.setDouble(2, double)
stmt.setLong(3, long)
}
stmt.executeUpdate()
}
}
})
env.execute()
}
}