package com.hmi1024.flink.example;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.*;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
public class BatchWordCount {
public static void main(String[] args) throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
final DataSource<String> lines = env.readTextFile("./data/input/wordcount.txt");
final FlatMapOperator<String, String> words = lines.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String line, Collector<String> out) throws Exception {
final String[] dataArray = line.split(" ");
for (String word : dataArray) {
out.collect(word);
}
}
});
final MapOperator<String, Tuple2<String, Integer>> wordAndOne = words.map(new MapFunction<String, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> map(String value) throws Exception {
return Tuple2.of(value, 1);
}
});
final UnsortedGrouping<Tuple2<String, Integer>> grouped = wordAndOne.groupBy(0);
final AggregateOperator<Tuple2<String, Integer>> summed = grouped.sum(1);
summed.print();
}
}
package com.hmi1024.flink.example;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
public class StreamWordCount {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(2);
final DataStreamSource<String> socketTextStream = env.socketTextStream("node1", 9999);
final SingleOutputStreamOperator<String> words = socketTextStream.flatMap(
new FlatMapFunction<String, String>() {
@Override
public void flatMap(String line, Collector<String> out) throws Exception {
final String[] dataArray = line.split(" ");
for (String word : dataArray) {
out.collect(word);
}
}
});
final SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = words.map(
new MapFunction<String, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> map(String value) throws Exception {
return Tuple2.of(value, 1);
}
});
final KeyedStream<Tuple2<String, Integer>, Tuple> keyedStream = wordAndOne.keyBy(0);
final SingleOutputStreamOperator<Tuple2<String, Integer>> summed = keyedStream.sum(1);
summed.print();
env.execute();
}
}