package wordcount;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
public class StreamWordCount {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
ParameterTool tool = ParameterTool.fromArgs(args);
String host = tool.get("host");
Integer port = tool.getInt("port");
DataStreamSource<String> source = env.socketTextStream(host, port);
SingleOutputStreamOperator<Tuple2<String, Integer>> sum = source.flatMap(new MyFlatMapFunction())
.keyBy(0)
.sum(1);
sum.print();
env.execute("word count");
}
public static class MyFlatMapFunction implements FlatMapFunction<String,Tuple2<String,Integer>>{
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] word = value.split(" ");
for (String s:word
) {
out.collect(new Tuple2<String, Integer>(s,1));
}
}
}
}
package wordcount;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.AggregateOperator;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.tuple.Tuple2;
public class SetWordCount {
public static void main(String[] args) throws Exception{
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataSource<String> stringDataSource = env.readTextFile("D:\\git\\csdn-flink\\csdn-flink-1\\src\\main\\resources\\word");
AggregateOperator<Tuple2<String, Integer>> sum = stringDataSource.flatMap(new StreamWordCount.MyFlatMapFunction())
.groupBy(0)
.sum(1);
sum.print();
}
}