目录
flink wordcount scala代码
pom配置
<dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-scala_2.12</artifactId> <version>1.12.1</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-scala --> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-scala_2.12</artifactId> <version>1.12.1</version> <scope>provided</scope> </dependency> 注:flink1.11及以上版本,需要加上flink-client不然执行DataSteam API时报错 java.lang.IlleagalStateException: No ExecutorFactory found to execute the application <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients --> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-clients_2.12</artifactId> <version>1.12.1</version> </dependency>
批处理wordcount
import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.flink.api.scala._
/**批处理 wordcount*/
object WordCount {
def main(args: Array[String]):Unit={
val env= ExecutionEnvironment.getExecutionEnvironment
val dataset=env.readTextFile("src\\main\\resources\\hadoop.txt");
val wordCountDs=dataset
.flatMap(_.split(" "))
.map((_,1)) //变成k,v结构
.groupBy(0)//直接flatmap后没办法groupBy,因为不是k,v结构,传二元组中的位置
.sum(1)//sum(),传二元组中的位置
wordCountDs.print();
}
}
流处理wordcout
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.api.scala._
object StreamWordCount {
def main(args: Array[String]): Unit = {
val env=StreamExecutionEnvironment.getExecutionEnvironment
val dataStreamDatasource=env.socketTextStream("192.168.101.51",7777)
val dataStream=dataStreamDatasource.flatMap(_.split(",")).map((_,1)).keyBy(0).sum(1)
dataStream.print()
env.execute("STREAM") //StreamExecutionEnvironment 要加execute,不然不执行
}
}
输出:
flink wordcount java代码
pom依赖
<properties>
<flink.version>1.12.0</flink.version>
<java.version>1.8</java.version>
<scala.binary.version>2.11</scala.binary.version>
<slf4j.version>1.7.30</slf4j.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime-web_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-to-slf4j</artifactId>
<version>2.14.0</version>
</dependency>
</dependencies>
批处理wordcount
public static void getWordCountBatch() throws Exception {
final ExecutionEnvironment env= ExecutionEnvironment.getExecutionEnvironment();
DataSource<String> dataSource= env.readTextFile("D:\\flink2021\\src\\main\\resources\\wordcount");
FlatMapOperator<String,String> flatMapOperator=dataSource.flatMap(new myFlatMapFunction());
// MapOperator<String,Tuple2<String,Integer>> mapOperator =
// flatMapOperator.map(new MapFunction<String, Tuple2<String,Integer>>() {
// @Override
// public Tuple2<String, Integer> map(String value) throws Exception {
// return Tuple2.of(value,1);
// }
// });
MapOperator<String,Tuple2<String,Integer>> mapOperator =
flatMapOperator.map((MapFunction<String, Tuple2<String,Integer>>) value-> {return Tuple2.of(value,1);}).returns(Types.TUPLE(Types.STRING,Types.INT));
//groupBy
UnsortedGrouping<Tuple2<String,Integer>> group=mapOperator.groupBy(0);
AggregateOperator<Tuple2<String, Integer>> result = group.sum(1);
result.print();
}
public static class myFlatMapFunction implements FlatMapFunction<String,String>{
@Override
public void flatMap(String in, Collector<String> out) throws Exception {
String[] keys=in.split(" ");
for (String key : keys) {
out.collect(key);
}
}
}
流处理wordCount
public static void getWordCountStream() throws Exception {
StreamExecutionEnvironment env= StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> datasource = env.readTextFile("D:\\flink2021\\src\\main\\resources\\wordcount");
SingleOutputStreamOperator<Tuple2<String, Integer>> flatMap = datasource.flatMap(new flatMapFunc());
KeyedStream<Tuple2<String, Integer>, String> keyBy = flatMap.keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
@Override
public String getKey(Tuple2<String, Integer> value) throws Exception {
return value.f0;
}
});
keyBy.sum(1).print();
env.execute();
}
public static class flatMapFunc implements FlatMapFunction<String,Tuple2<String,Integer>>{
@Override
public void flatMap(String in, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] words= in.split(" ");
for (String word : words) {
out.collect(new Tuple2<>(word,1));
}
}
}