基于DataSet:
package Com.ZQQQ.Flinkwork;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
/**
* @Author he zhi qiang
* @Description 基于Dataset 进行单词统计(批处理,离线-来一批.处理一批)
* @Date 2021/12/13
* @Param
* @return
**/
public class DataSetWordCount {
public static void main(String[] args) {
// 创建执行环境
ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment();
String inputpath ="D:\\Conding_pojoect\\MyFlink\\src\\main\\resources\\hello.txt";
DataSet<String> textFile = environment.readTextFile(inputpath);
DataSet<Tuple2<String, Integer>> resutset = textFile.flatMap(new myflatMap())
.groupBy(0) // 按照第一个位置的元组进行分组
.sum(1); // 按照第二个位置元素进行累加求和
try {
resutset.print();
} catch (Exception e) {
e.printStackTrace();
}
}
public static class myflatMap implements FlatMapFunction<String, Tuple2<String,Integer>>{
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
//按空格进行分词
String[] words = value.split(" ");
for (String word : words) {
out.collect(new Tuple2<String, Integer>(word,1));
}
}
}
}
1、以上程序基于maven项目创建,导入所需jia包。
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>MyFlink</artifactId>
<version>1.0-SNAPSHOT</version>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>6</source>
<target>6</target>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.12</artifactId>
<version>1.9.3</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.12</artifactId>
<version>1.9.3</version>
<!-- <scope>provided</scope>-->
</dependency>
</dependencies>
</project>
2、在resources 目录下创建hello.txt文件,并填充数据。
3、编写代码。
基于DataStream:
package Com.ZQQQ.Flinkwork;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
* @Author he zhi qiang
* @Description 基于DataStream 进行单词统计(流处理 -数据以流方式读取文本文件并处理)
* @Date 2021/12/15
* @Param
* @return
**/
public class DataStreamWordCount {
public static void main(String[] args) {
long Start = System.currentTimeMillis();
StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
//设置并行度为 8,若设置了就是以电脑核数为准
environment.setParallelism(8);
// 从文件读取数据
String inputpath = "D:\\Conding_pojoect\\MyFlink\\src\\main\\resources\\hello.txt";
DataStream<String> DataStream = environment.readTextFile(inputpath);
//基于数据流进行转换计算
DataStream<Tuple2<String, Integer>> returns = DataStream.flatMap(new DataSetWordCount.myflatMap())
.keyBy(0)
.sum(1);
returns.print();
long end = System.currentTimeMillis();
long ss = (Start -end);
System.out.println("执行任务耗费时间为:"+ss);
//执行程序
try {
environment.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}