2.1 搭建
maven
工程
FlinkTutorial
2.1.1 pom
文件
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.atguigu</groupId> <artifactId>FlinkTutorial</artifactId> <version>1.0-SNAPSHOT</version> <dependencies> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-scala_2.12</artifactId> <version>1.10.1</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-scala --> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-scala_2.12</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-kafka-0.11_2.12</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>org.apache.bahir</groupId> <artifactId>flink-connector-redis_2.11</artifactId> <version>1.0</version> <exclusions> <exclusion> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-java_2.11</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-elasticsearch6_2.12</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.44</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-statebackend-rocksdb_2.12</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-table-planner_2.12</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-table-planner-blink_2.12</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-csv</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-json</artifactId> <version>1.10.1</version> </dependency> </dependencies> <build> <plugins> <!-- 该插件用于将Scala代码编译成class文件 --> <plugin> <groupId>net.alchim31.maven</groupId> <artifactId>scala-maven-plugin</artifactId> <version>4.4.0</version> <executions> <execution> <!-- 声明绑定到maven的compile阶段 --> <goals> <goal>compile</goal> </goals> </execution> </executions> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-assembly-plugin</artifactId> <version>3.3.0</version> <configuration> <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> </configuration> <executions> <execution> <id>make-assembly</id> <phase>package</phase> <goals> <goal>single</goal> </goals> </execution> </executions> </plugin> </plugins> </build> </project>
2.1.2
添加
scala
框架 和
scala
文件夹
2.2
批处理
wordcount
package com.atguigu.wc import org.apache.flink.api.scala.ExecutionEnvironment import org.apache.flink.api.scala._ /** * Copyright (c) 2018-2028 尚硅谷 All Rights Reserved * * Project: FlinkTutorial * Package: com.atguigu.wc * Version: 1.0 * * Created by wushengran on 2020/8/4 11:38 */ // 批处理的word count object WordCount { def main(args: Array[String]): Unit = { // 创建一个批处理的执行环境 val env: ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment // 从文件中读取数据 val inputPath: String = "D:\\Projects\\BigData\\FlinkTutorial\\src\\main\\resources\\hello.txt" val inputDataSet: DataSet[String] = env.readTextFile(inputPath) // 对数据进行转换处理统计,先分词,再按照word进行分组,最后进行聚合统计 val resultDataSet: DataSet[(String, Int)] = inputDataSet .flatMap(_.split(" ")) .map((_, 1)) .groupBy(0) // 以第一个元素作为key,进行分组 .sum(1) // 对所有数据的第二个元素求和 // 打印输出 resultDataSet.print() } }
注意:
Flink
程序支持
java
和
scala
两种语言,本课程中以
scala
语言为主。在
引入包中,有
java
和
scala
两种包时注意要使用
scala
的包。
2.3
流处理
wordcount
package com.atguigu.wc import org.apache.flink.api.java.utils.ParameterTool import org.apache.flink.streaming.api.scala._ /** * Copyright (c) 2018-2028 尚硅谷 All Rights Reserved * * Project: FlinkTutorial * Package: com.atguigu.wc * Version: 1.0 * * Created by wushengran on 2020/8/4 14:05 */ // 流处理word count object StreamWordCount { def main(args: Array[String]): Unit = { // 创建流处理的执行环境 val env = StreamExecutionEnvironment.getExecutionEnvironment // env.setParallelism(8) // env.disableOperatorChaining() // 从外部命令中提取参数,作为socket主机名和端口号 val paramTool: ParameterTool = ParameterTool.fromArgs(args) val host: String = paramTool.get("host") val port: Int = paramTool.getInt("port") // 接收一个socket文本流 val inputDataStream: DataStream[String] = env.socketTextStream(host, port) // 进行转化处理统计 val resultDataStream: DataStream[(String, Int)] = inputDataStream .flatMap(_.split(" ")) .filter(_.nonEmpty) .map((_, 1)) .keyBy(0) .sum(1) resultDataStream.print().setParallelism(1) // 启动任务执行 env.execute("stream word count") } }
测试——在
linux
系统中用
netcat
命令进行发送测试。
nc -lk 7777