Zyllink项目 实时ETL

1 pom

<dependencies>
    <!-- Flink依赖的java语言环境 -->
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-java</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <!-- hadoop开发环境 start -->
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-common</artifactId>
        <version>${hadoop.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-hdfs</artifactId>
        <version>${hadoop.version}</version>
        <!-- 除去hadoop-hdfs包中xml-apis的 -->
        <exclusions>
            <exclusion>
                <groupId>xml-apis</groupId>
                <artifactId>xml-apis</artifactId>
            </exclusion>
        </exclusions>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-client</artifactId>
        <version>${hadoop.version}</version>
    </dependency>
    <!-- hadoop开发环境 end -->
    <!-- hive开发环境 -->
    <dependency>
        <groupId>org.apache.hive</groupId>
        <artifactId>hive-jdbc</artifactId>
        <version>${hive.version}</version>
        <exclusions>
            <exclusion>
                <groupId>org.json</groupId>
                <artifactId>json</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.logging.log4j</groupId>
                <artifactId>log4j-1.2-api</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.logging.log4j</groupId>
                <artifactId>log4j-web</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.logging.log4j</groupId>
                <artifactId>log4j-slf4j-impl</artifactId>
            </exclusion>
        </exclusions>
    </dependency>
    <!-- json解析 -->
    <dependency>
        <groupId>org.json</groupId>
        <artifactId>json</artifactId>
        <version>${json.version}</version>
    </dependency>
    <!-- hbase开发环境 start-->
    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-client</artifactId>
        <version>${hbase.version}</version>
    </dependency>
    <!--phoenix -->
    <dependency>
        <groupId>org.apache.phoenix</groupId>
        <artifactId>phoenix-core</artifactId>
        <version>${phoenix.version}</version>
        <exclusions>
            <exclusion>
                <groupId>org.glassfish</groupId>
                <artifactId>javax.el</artifactId>
            </exclusion>
        </exclusions>
    </dependency>
    <!-- flink 开发环境 start -->
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-streaming-java_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-connector-filesystem_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-connector-kafka-0.11_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-hbase_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-statebackend-rocksdb_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-runtime-web_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <!-- flink 开发环境 end -->
    <!-- kafka开发环境客户端 -->
    <dependency>
        <groupId>org.apache.kafka</groupId>
        <artifactId>kafka-clients</artifactId>
        <version>${kafka.version}</version>
    </dependency>
    <!-- lombok -->
    <dependency>

代码

// 本地访问hdfs设置系统属性

System.setProperty("HADOOP_USER_NAME", "root");

// 1.创建flink的流式环境、设置任务的检查点(数据存储hdfs)、设置分区发现、设置任务的重启策略、数据积压内部解决策略

StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();

 // 1.1.设置流式数据的参照时间 ProcessingTime:事件被处理时机器的系统时间;IngestionTime:事件进入flink事件;EventTime:事件数据中某个字段的时间

env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

// 1.2.checkpoint配置 开启检查点,设置时间间隔为5分钟

env.enableCheckpointing(300000);

// 1.3.检查点Model设置 exactly once 仅消费一次 保证消息不丢失不重复

env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);

//1.4 防止checkpoint 太过于频繁而导致业务处理的速度下降

env.getCheckpointConfig().setMinPauseBetweenCheckpoints(5000);

//1.5 设置checkpoint的超时时间

env.getCheckpointConfig().setCheckpointTimeout(20000);

//1.6 设置checkpoint最大的尝试次数,次数必须 >= 1

env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);

//1.7 设置取消任务时保留checkpoint,checkpoint默认会在整个job取消时被删除

env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

//1.8 设置执行job过程中,保存检查点错误时,job不失败

env.getCheckpointConfig().setFailOnCheckpointingErrors(false);

String hdfsUri = ConfigLoader.getProperty("hdfsUri");

// 1.9.设置检查点存储的位置,使用RocksDBStateBackend,存储在hdfs分布式文件系统中,增量检查点

try {

    env.setStateBackend(new RocksDBStateBackend(hdfsUri + "/flink/checkpoint/KafkaSourceDataTask"));

} catch (IOException e) {

    e.printStackTrace();

}

//

// 重启策略

env.setRestartStrategy(RestartStrategies.noRestart());

// 故障率重启策略

// env.setRestartStrategy(RestartStrategies.failureRateRestart(3, Time.minutes(5), Time.minutes(10)));

// 设置重启策略为延迟重启策略

// env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 600000));

// 2.创建flink消费kafka数据的对象,伴随着kafka消费者的属性

String topic = ConfigLoader.getPropety("kafka.topic");

String brokeServers = ConfigLoader.getPropety("bootstrap.servers");

Properties prop = new Properties();

prop.setProperty("bootstrap.servers", brokeServers);

prop.setProperty("group.id", "KafkaSourceDataTask");

/** 设置kafka分区感知 间隔时间30秒*/

prop.setProperty("flink.partition-discovery.interval-millis", "30000");

// 设置key的反序列化对象

prop.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");

// 设置value发序列化对象

prop.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");

// 设置自动提交offset策略为 earliest

prop.setProperty("auto.offset.reset", "earliest");

FlinkKafkaConsumer011<String> kafkaConsumer = new FlinkKafkaConsumer011<String>(topic, new SimpleStringSchema(), prop);

// 设置消费数据时根据指定消费组的offset进行消费

kafkaConsumer.setStartFromGroupOffsets();

// 设置自动提交offset保存到检查点

kafkaConsumer.setCommitOffsetsOnCheckpoints(true);

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值