Flink1.12 流批一体Hello-world


环境说明:

java: 1.8

flink: 1.12.2

编译器:IDEA MAVEN项目

要开发flink程序,首先,我们需要引入依赖,必要依赖POM.xml文件如下

(1)核心依赖

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>flink-learn-1-demo</artifactId>
    <version>1.0</version>
    <!-- 指定仓库位置,依次为aliyun、apache和cloudera仓库 -->
    <repositories>
        <repository>
            <id>aliyun</id>
            <url>http://maven.aliyun.com/nexus/content/groups/public/</url>
        </repository>
        <repository>
            <id>apache</id>
            <url>https://repository.apache.org/content/repositories/snapshots/</url>
        </repository>
        <repository>
            <id>cloudera</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
        </repository>
    </repositories>

    <properties>
        <encoding>UTF-8</encoding>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
        <java.version>1.8</java.version>
        <scala.version>2.12</scala.version>
        <flink.version>1.12.2</flink.version>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
    </dependencies>

    <build>
        <sourceDirectory>src/main/java</sourceDirectory>
        <plugins>
            <!-- 编译插件 -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.5.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <!--<encoding>${project.build.sourceEncoding}</encoding>-->
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-surefire-plugin</artifactId>
                <version>2.18.1</version>
                <configuration>
                    <useFile>false</useFile>
                    <disableXmlReport>true</disableXmlReport>
                    <includes>
                        <include>**/*Test.*</include>
                        <include>**/*Suite.*</include>
                    </includes>
                </configuration>
            </plugin>
            <!-- 打包插件(会包含所有依赖) -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>2.3</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <filters>
                                <filter>
                                    <artifact>*:*</artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SF</exclude>
                                        <exclude>META-INF/*.DSA</exclude>
                                        <exclude>META-INF/*.RSA</exclude>
                                    </excludes>
                                </filter>
                            </filters>
                            <transformers>
                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <!-- 设置jar包的入口类(可选) -->
                                    <mainClass></mainClass>
                                </transformer>
                            </transformers>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
</project>

(2)流批一体测试

package com.leilei;

import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

/**
 * @author lei
 * @version 1.0
 * @date 2021/3/7 15:51
 * @desc 单词计数 DataStream 匿名内部类
 */
public class WordCountDataStream1 {
    public static void main(String[] args) throws Exception {
        // 1.准备环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 设置运行模式 为流
        env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
        // 2.准备数据源
        DataStreamSource<String> elementsSource = env.fromElements("java,scala,php,c++",
                "java,scala,php", "java,scala", "java");
        // 3.数据处理转换
        KeyedStream<Tuple2<String, Integer>, String> streamResult = elementsSource.flatMap(new FlatMapFunction<String, String>() {
            @Override
            public void flatMap(String element, Collector<String> out) throws Exception {
                String[] wordArr = element.split(",");
                for (String word : wordArr) {
                    out.collect(word);
                }
            }
        }).map(new MapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public Tuple2<String, Integer> map(String word) throws Exception {
                return Tuple2.of(word, 1);
            }
        }).keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
            @Override
            public String getKey(Tuple2<String, Integer> value) throws Exception {
                return value.f0;
            }
        });
        SingleOutputStreamOperator<Tuple2<String, Integer>> sum = streamResult.sum(1);
        // 4.数据输出
        sum.print();
        // 5.执行程序
        env.execute("flink-hello-world");

    }
}

上文中,指定flink程序从自定义元素中加载数据源,且指定处理模式为流模式STREAMING(但是,我们都知道,我们这里的数据源数据本质上是有界的,因为个数我们都看得见数的清啊…)

env.setRuntimeMode(RuntimeExecutionMode.STREAMING);

image-20210331224907713

结论:数据来一个处理一个,输出结果又中间依次累加计算过程,,,流处理方式无疑


设置运行模式为:批处理

env.setRuntimeMode(RuntimeExecutionMode.BATCH);

image-20210331225121567

数据以批方式进行处理,每项数据合并转换后,均只输出了最终结果,无中途流程数据输出


设置运行模式为:自动识别

env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);

image-20210331225348701

结论:数据以批方式进行处理,每项数据合并转换后,均只输出了最终结果,无中途流程数据输出****


综上,可以看出flink1.12已然实现了流批的自动切换!我们如果有流批处理相同逻辑处理场景的话,使用flink可节省需要开发时间,我们只需要一套代码,一套计算框架即可搞定!

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
上百节课视频详细讲解,需要的小伙伴自行百度网盘下载,链接见附件,永久有效。 共课程包含9个章节:Flink安装部署与快速入门、Flink批处理API、Flink流处理API、Flink高级API、Flink-Table与SQL、Flink-Action综合练习、Flink-高级特性和新特性、Flink多语言开发、Flink性能调优 课程目录: Flink-day01 00-[了解]-课程介绍 01-[了解]-Flink概述 02-[掌握]-Flink安装部署-local本地模式 03-[掌握]-Flink安装部署-Standalone独立集群模式 04-[掌握]-Flink安装部署-Standalone-HA高可用集群模式 05-[重点]-Flink安装部署-On-Yarn-两种提交模式 06-[重点]-Flink安装部署-On-Yarn-两种提交模式-演示 07-[了解]-Flink入门案例-前置说明 08-[掌握]-Flink入门案例-环境准备 09-[掌握]-Flink入门案例-代码实现-1-DataSet 10-[掌握]-Flink入门案例-代码实现-2-DataStream流批一体-匿名内部类版 11-[掌握]-Flink入门案例-代码实现-2-DataStream流批一体-Lambda版 12-[掌握]-Flink入门案例-代码实现-2-DataStream流批一体-On-Yarn 13-[掌握]-Flink原理初探-角色分工-执行流程-DataFlow 14-[掌握]-Flink原理初探-TaskSlot和TaskSlotSharing 15-[掌握]-Flink原理初探-执行流程图生成 Flink-day02 01-[理解]-流处理核心概念说明 02-[掌握]-Source-基于集合 03-[掌握]-Source-基于文件 04-[掌握]-Source-基于Socket 05-[掌握]-Source-自定义Source-随机生成订单数据 06-[掌握]-Source-自定义Source-实时加载MySQL数据 07-[掌握]-Source-Transformation-基本操作 08-[掌握]-Source-Transformation-合并和连接 09-[掌握]-Source-Transformation-拆分和选择 10-[掌握]-Source-Transformation-重平衡分区 11-[掌握]-Source-Transformation-其他分区 12-[掌握]-Source-Sink-基于控制台和文件 13-[掌握]-Source-Sink-自定义Sink 14-[了解]-Connectors-JDBC 15-[重点]-Connectors-Flink整合Kafka-Source 16-[重点]-Connectors-Flink整合Kafka-Sink-实时ETL 17-[了解]-Connectors-Redis Flink-day03 01-[了解]-Flink高级API-四大基石介绍 02-[了解]-Flink高级API-Window-分类和API介绍 03-[掌握]-Flink高级API-Window-基于时间的滑动和滚动窗口 04-[了解]-Flink高级API-Window-基于数量的滑动和滚动窗口 05-[了解]-Flink高级API-Window-会话窗口 06-[理解]-Flink高级API-Time-时间分类和事件时间的重要性及Watermaker的引入 07-[理解]-Flink高级API-Time-Watermaker概念详解 08-[理解]-Flink高级API-Time-Watermaker图解 09-[掌握]-Flink高级API-Time-Watermaker-代码演示 10-[了解]-Flink高级API-Time-Watermaker-代码演示-理论验证 11-[掌握]-Flink高级API-Time-Watermaker-outputTag-allowedlateness解决数据丢失 12-[了解]-Flink高级API-State-Flink中状态的自动管理 13-[了解]-Flink高级API-State-有状态计算和无状态计算 14-[了解]-Flink高级API-State-状态分类 15-[了解]-Flink高级API-State-keyState代码演示 16-[了解]-Flink高级API-State-OperatorState代码演示 Flink-day04-07等等

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值