创建Maven并导入POM
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.11</artifactId>
<version>1.10.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-scala -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.10.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<!-- 该插件用于将Scala代码编译成class文件 -->
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.4.6</version>
<executions>
<execution>
<!-- 声明绑定到maven的compile阶段 -->
<goals>
<goal>compile</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.0.0</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
添加scala框架 和 scala文件夹
添加data.txt
flink\src\main\resources\data.txt
hello world
hello spark
hello scala you
hello flink yes hao are you
编写批处理 WordCount
scala\com\atguigu\wordcount\Wordcount.scala
// 隐式转换
import org.apache.flink.api.scala._
/**
* 批处理
*/
object Wordcount {
def main(args: Array[String]): Unit = {
// 创建执行环境
val env = ExecutionEnvironment.getExecutionEnvironment
// 从文件中读取数据
val inputPath = "D:\\MyWork\\WorkSpaceIDEA\\flink\\src\\main\\resources\\data.txt"
val inputDS: DataSet[String] = env.readTextFile(inputPath)
// 分词之后,对单词进行groupby分组,然后用sum进行聚合
val wordCountDS: AggregateDataSet[(String, Int)] = inputDS
.flatMap(_.split(" "))
.map((_, 1))
.groupBy(0)
.sum(1)
// 打印输出
wordCountDS.print()
}
}
流处理WordCount
import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
object StreamWordCount {
def main(args: Array[String]): Unit = {
// 从外部命令中获取参数
val params: ParameterTool = ParameterTool.fromArgs(args)
val host: String = params.get("host")
val port: Int = params.getInt("port")
// 创建流处理环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
// 接收socket文本流
val textDstream: DataStream[String] = env.socketTextStream(host, port)
// flatMap和Map需要引用的隐式转换
import org.apache.flink.api.scala._
val dataStream: DataStream[(String, Int)] = textDstream
.flatMap(_.split(" "))
.map((_, 1))
.keyBy(0)
.sum(1)
dataStream.print().setParallelism(1)
// 启动executor,执行任务
env.execute("Socket stream word count")
}
}