flink版本:1.10.0
code:
// 构建env环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.enableCheckpointing(1000*60) //1000*60 s一个checkpoint
env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
env.setStateBackend(new RocksDBStateBackend(checkpointPath, true))
env.getCheckpointConfig.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION) //job取消,cp还在
//任务失败会进行五次重试,重试间隔为50s
env.setRestartStrategy(RestartStrategies.
fixedDelayRestart(5,
50000))
//kafak参数
val properties = new Properties()
properties.setProperty("bootstrap.servers", TEST_BROKERS)
properties.setProperty("group.id", "toHdfs-Parquet-group")
val kafkaConsumer = new FlinkKafkaConsumer[String](LAUNCHS_TOPIC, new SimpleStringSchema(), properties)
val source = env.addSource(kafkaConsumer)
// 解析json
val kafkaSource=source.flatMap(new RichFlatMapFunction[String, LogSchema] {
var gson:Gson=_
override def open(parameters: Configuration): Unit = {
gson=new Gson
}
override def flatMap(value: String, out: Collector[LogSchema]): Unit = {
val LogSchema = gson.fromJson(value, classOf[LogSchema