1.搭建maven工程 flink-2019
1.2pom文件
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion>
<groupId>com.atguigu.flink</groupId> <artifactId>flink</artifactId> <version>1.0-SNAPSHOT</version>
<dependencies> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-scala_2.11</artifactId> <version>1.7.0</version> </dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-scala --> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-scala_2.11</artifactId> <version>1.7.0</version> </dependency> </dependencies> <build> <plugins> <!-- 该插件用于将Scala代码编译成class文件 --> <plugin> <groupId>net.alchim31.maven</groupId> <artifactId>scala-maven-plugin</artifactId> <version>3.4.6</version> <executions> <execution> <!-- 声明绑定到maven的compile阶段 --> <goals> <goal>compile</goal> <goal>testCompile</goal> </goals> </execution> </executions> </plugin>
<plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-assembly-plugin</artifactId> <version>3.0.0</version> <configuration> <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> </configuration> <executions> <execution> <id>make-assembly</id> <phase>package</phase> <goals> <goal>single</goal> </goals> </execution> </executions> </plugin> </plugins> </build> </project> |
1.2 添加scala框架 和 scala文件夹
2 批处理wordcount
package com.imau.edu.flink
import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.flink.api.scala._
object DataSet_WordCount {
def main(args:Array[String]):Unit={
//1.env 初始化环境
val env = ExecutionEnvironment.getExecutionEnvironment
//2.source 数据源
val Dataset = env.readTextFile("D:\\flinkdemo\\hello.txt")
//3.transform 转换
/**
* 在flink里面把reduceBykey分开了,
* Bykey和计算分开了
*/
val datasum = Dataset.flatMap((_.split(" "))).map((_,1)).groupBy(0).sum(1)
//4.sink 类似于行动算子
datasum.print()
}
}
3 流处理 wordcount
package com.imau.edu.flink
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.api.scala._
object DataStream_WordCount {
def main(args:Array[String]):Unit={
// env 初始化环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
// source 数据源 使用netcat模拟实时数据 nc -lk 7777
val dataStream = env.socketTextStream("Faded103",7777)
// transform 转换
val sumdatastream = dataStream.flatMap((_.split(" "))).filter(_.nonEmpty).map((_,1)).keyBy(0).sum(1)
// sink 类似于行动算子
sumdatastream.print()
env.execute()
}
}