用Flink对keyed state 状态的代码进行设置TTL(过期时间)和hdfs的checkpoints恢复和配置Flink-conf.yaml

依赖

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-core</artifactId>
            <version>1.8.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.11</artifactId>
            <version>1.8.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-scala_2.11</artifactId>
            <version>1.8.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-scala_2.11</artifactId>
            <version>1.8.1</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>1.7.26</version>
        </dependency>
        <!--添加HDFS支持-->

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.9.2</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.9.2</version>
        </dependency>

        <!--添加Kafka支持-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_2.11</artifactId>
            <version>1.8.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-json</artifactId>
            <version>1.8.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-filesystem_2.11</artifactId>
            <version>1.8.1</version>
        </dependency>
        <!--写到Redis中-->
        <dependency>
            <groupId>org.apache.bahir</groupId>
            <artifactId>flink-connector-redis_2.11</artifactId>
            <version>1.0</version>
        </dependency>

    </dependencies>
    <build>
        <plugins>
            <!--在执行package时候,将scala源码编译进jar-->
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>4.0.1</version>
                <executions>
                    <execution>
                        <id>scala-compile-first</id>
                        <phase>process-resources</phase>
                        <goals>
                            <goal>add-source</goal>
                            <goal>compile</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
代码
package com.baizhi.demo04

import org.apache.flink.streaming.api.CheckpointingMode
import org.apache.flink.streaming.api.environment.CheckpointConfig.ExternalizedCheckpointCleanup
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _}

object Flink {
  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    //开启checkpoint
    env.enableCheckpointing(7000,CheckpointingMode.EXACTLY_ONCE)
    //checkpoint必须在2s内完成,如果完成不了终止
    env.getCheckpointConfig.setCheckpointTimeout(4000)
    //距离上一次的checkpoint完成之后需要等5s 之后再开启下一次的checkpoint
    env.getCheckpointConfig.setMinPauseBetweenCheckpoints(5000)
    env.getCheckpointConfig.setMaxConcurrentCheckpoints(1)//只开启一个checkpoint线程
    //在退出应用时候,不删除checkpoint数据
    env.getCheckpointConfig.enableExternalizedCheckpoints(ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION)
    //必须保证任务可以从checkpoint恢复,恢复不成功任务失败
    env.getCheckpointConfig.setFailOnCheckpointingErrors(true)

    val data: DataStream[String] = env.socketTextStream("Flink",9999)

    data.flatMap(line=> line.split("\\s+"))
      .map((_,1))
      .keyBy(0)
      .map(new CountMapFunction)
      .print()

    env.execute()



  }

}

package com.baizhi.demo04

import org.apache.flink.api.common.functions.RichMapFunction
import org.apache.flink.api.common.state.{StateTtlConfig, ValueState, ValueStateDescriptor}
import org.apache.flink.api.common.time.Time
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.scala._

class CountMapFunction extends RichMapFunction[(String,Int),(String,Int)]{
  var state:ValueState[Int]= _
  override def map(value: (String, Int)): (String, Int) = {
    var history = state.value()
    if (history==null){
      history=0
    }
   state.update(history+value._2)
    (value._1,history+value._2)
  }

  override def open(parameters: Configuration): Unit = {
    val dec = new ValueStateDescriptor[Int]("count",createTypeInformation[Int])
    //1.创建TTLConfig
    val ttlConfig = StateTtlConfig
      .newBuilder(Time.seconds(80)) //这是state存活时间10s
      .setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite)//设置过期时间更新方式
      .setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired)//永远不要返回过期的状态
      //.cleanupInRocksdbCompactFilter(1000)//处理完1000个状态查询时候,会启用一次CompactFilter
      .build

    //2.开启TTL
    dec.enableTimeToLive(ttlConfig)

    val context = getRuntimeContext
    state=context.getState(dec)


  }
}

配置Flink的环境
配置Flink-conf.yaml
#==============================================================================
# Fault tolerance and checkpointing
#==============================================================================

# The backend that will be used to store operator state checkpoints if
# checkpointing is enabled.
#
# Supported backends are 'jobmanager', 'filesystem', 'rocksdb', or the
# <class-name-of-factory>.
#
 state.backend: rocksdb

# Directory for checkpoints filesystem, when using any of the default bundled
# state backends.
#
 state.checkpoints.dir: hdfs:///flink-checkpoints

# Default target directory for savepoints, optional.
#
 state.savepoints.dir: hdfs:///flink-savepoints

# Flag to enable/disable incremental checkpoints for backends that
# support incremental checkpoints (like the RocksDB state backend).
#
 state.backend.incremental: true
 state.backend.rocksdb.ttl.compaction.filter.enabled: true

#==============================================================================
# HistoryServer
#==============================================================================

# The HistoryServer is started and stopped via bin/historyserver.sh (start|stop)

# Directory to upload completed jobs to. Add this directory to the list of
# monitored directories of the HistoryServer as well (see below).
jobmanager.archive.fs.dir: hdfs:///completed-jobs/

# The address under which the web-based HistoryServer listens.
historyserver.web.address: CentOS

# The port under which the web-based HistoryServer listens.
historyserver.web.port: 8082

# Comma separated list of directories to monitor for completed jobs.
historyserver.archive.fs.dir: hdfs:///completed-jobs/

# Interval in milliseconds for refreshing the monitored directories.
historyserver.archive.fs.refresh-interval: 10000


将hadoop_classpath配置到环境变量中

export HADOOP_CLASSPATH=hadoop classpath

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值