业务场景:
sdk 数据统一入Kafka 的一个 topic(topic_sdk_log),其中包含多种事件类型数据,如:登录,注册,激活等,需要将 Kafka 中数据根据事件类型分别写入 MySQL 多个表。这里使用 Flink 每5秒写入 MySQL 不同表。
数据示例:
{"key":"login","data":{"_game_version":"","_package_id":"12280200","_core_account":"ysdk_oGFjT0XEjdefIOgd7uApwWUX2ccY","_time":"2020-03-23 15:35:11","_time_server":"2020-03-23 15:35:59","_idfa":"","_imei":"","_mac":null,"_imsi":null,"_device_code":"46e0fd235df0b7c167c0cbe82be02a3c"}}
{"key":"activate","data":{"_game_version":"","_package_id":850030,"_time":"2020-03-23 15:25:56","_time_server":"2020-03-23 15:27:42","_idfa":"","_imei":"866092032174377","_mac":"02:00:00:00:00:00","_imsi":"460001960611910","_device_code":"fc9b4de27db42c94433a02c59cd5e2ff"}}
KafkaToMySQL 主类
import com.alibaba.fastjson.JSON
import com.sm.conf.ConfigurationManager
import com.sm.constants.Constants
import com.sm.sink.MySQLSink
import com.sm.entry.SdkData
import com.sm.util.KafkaUtils
import com.sm.window.SdkWindow
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.TimeCharacteristic
/**
* Flink 读取 Kafka,每分钟聚合一次数据,批量写入 MySQL
*
* create by LiuJinHe 2020/3/25
*/
object KafkaToMySQL {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
// 失败重启
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(10, 3L))
// env.setParallelism(1)
// checkpoint 周期
env.enableCheckpointing(10000)
val config = env.getCheckpointConfig
// exactly_one 模式
config.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
// checkpoint 之间最小间隔
config.setMinPauseBetweenCheckpoints(1000)
// checkpoint 超时时间,超时将被丢弃
config.setCheckpointTimeout(10000)
// 同一时间只允许进行一次 checkpoint
config.setMaxConcurrentCheckpoints(1)
val kafkaProp = ConfigurationManager.load(Constants.KAFKA_PROPERTIES)
val kafkaConsumer = KafkaUtils.getConsumer(kafkaProp).setStartFromGroupOffsets()
val dataStream = env.addSource(kafkaConsumer)
dataStream.map(line => {
val data = JSON.parseObject(line, classOf[SdkData])
data
})
.filter(line =>
line.key == "activate" || line.key == "register" || line.key == "start_app" || line.key == "login"
)
.keyBy(data => data.key)
.timeWindowAll(Time.seconds(3))
.apply(new SdkWindow)
.addSink(new MySink)
env.execute("stream