Flink 流处理和批处理测试小代码

批处理:

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;

public class BatchJob {

	public static void main(String[] args) throws Exception {
		// set up the batch execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSource<String> stringDataSource = env.fromElements("ssssss yi ss", "ss jiu sss");
		DataSet<Tuple2<String, Integer>> counts =
				// 把每一行文本切割成二元组,每个二元组为: (word,1)
				stringDataSource.flatMap(new Tokenizer())
						// 根据二元组的第“0”位分组,然后对第“1”位求和
						.groupBy(0)
						.sum(1);

		counts.print();
		/*env.execute("Flink Batch Java API Skeleton");*/
	}

	public static class Tokenizer implements FlatMapFunction<String, Tuple2<String, Integer>> {
		@Override
		public void flatMap(String value, Collector<Tuple2<String, Integer>> out) {
			// 统一大小写并把每一行切割为单词
			String[] tokens = value.toLowerCase().split("\\W+");

			// 消费二元组
			for (String token : tokens) {
				if (token.length() > 0) {
					out.collect(new Tuple2<String, Integer>(token, 1));
				}
			}
		}
	}
}

流处理:

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;

public class StreamingJob {
	public static void main(String[] args) throws Exception {

		// 创建执行环境
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
		// 设置socket数据源
		DataStreamSource<String> source = env.socketTextStream("master1", 7777, "\n");
		// 转化处理数据
		DataStream<WordWithCount> dataStream = source.flatMap(new FlatMapFunction<String, WordWithCount>() {
			@Override
			public void flatMap(String line, Collector<WordWithCount> collector) throws Exception {
				for (String word : line.split(" ")) {
					collector.collect(new WordWithCount(word, 1));
				}
			}
		}).keyBy("word")//以key分组统计
				.timeWindow(Time.seconds(2),Time.seconds(2))//设置一个窗口函数,模拟数据流动
				.sum("count");//计算时间窗口内的词语个数

		// 输出数据到目的端
		dataStream.print();

		// 执行任务操作
		env.execute("Flink Streaming Word Count By Java");

	}

	public static class WordWithCount{
		public String word;
		public int count;

		public WordWithCount(){

		}

		public WordWithCount(String word, int count) {
			this.word = word;
			this.count = count;
		}
		@Override
		public String toString() {
			return "WordWithCount{" +
					"word='" + word + '\'' +
					", count=" + count +
					'}';
			}
		}
	}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值