1、Java版本batch wordcount
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.AggregateOperator;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.operators.FlatMapOperator;
import org.apache.flink.api.java.operators.MapOperator;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
public class flinkWordCountBatchJava {
public static void main(String[] args) throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSource<String> dataSource = env.readTextFile("F://word.txt");
FlatMapOperator<String, String> flatMapString = dataSource.flatMap(new FlatMapFunction<String, String>() {
public void flatMap(String s, Collector<String> collector) {
String[] words = s.split(" ");
for (String word : words) {
collector.collect(word);
}
}
});
MapOperator<String, Tuple2<String,Long>> mapTuple = flatMapString.map(new MapFunction<String, Tuple2<String,Long>>() {
public Tuple2<String, Long> map(String s){
return new Tuple2<String, Long>(s, 1L);
}
});
AggregateOperator<Tuple2<String,Long>> sum = mapTuple.groupBy(0).sum(1);
sum.print();
}
}
2、Java版本 stream wordcount
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
public class flinkWordCountStreamJava {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> dataStreamSource = env.socketTextStream("192.168.169.128", 9000, "\n");
SingleOutputStreamOperator<Tuple2<String, Integer>> tuple2SingleOutputStreamOperator = dataStreamSource.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
public void flatMap(String s, Collector<Tuple2<String, Integer>> collector) throws Exception {
for (String s1 : s.split(" ")) {
collector.collect(new Tuple2<String, Integer>(s1, 1));
}
}
});
SingleOutputStreamOperator<Tuple2<String, Integer>> sum = tuple2SingleOutputStreamOperator.keyBy(0).sum(1);
sum.print();
env.execute();
}
}
3、Scala版本batch
import org.apache.flink.api.scala.ExecutionEnvironment
object flinkWordCountBatch {
def main(args: Array[String]): Unit = {
// 加载环境
val env = ExecutionEnvironment.getExecutionEnvironment
// 指定读取文件
val fileSource = env.readTextFile("F://word.txt")
import org.apache.flink.api.scala._
fileSource.flatMap(_.split(" "))
.map((_,1))
.groupBy(0)
.sum(1)
.print()
}
}
4、scala版本stream
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
object flinkWordCountStream {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
val socketSource = env.socketTextStream("192.168.169.128",9000,'\n')
import org.apache.flink.api.scala._
socketSource.flatMap(_.split(" "))
.map((_,1))
.keyBy(0)
.sum(1)
.print()
env.execute()
}
}