02_Flink Streaming SourceFunction

env对象的addSource(SourceFunction)。需要传入一个SourceFunction对象。这个对象作为接入数据源的接口

package com.alibaba.flink.train.streaming;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.flink.streaming.api.functions.source.SourceFunction;

public class MemSource implements SourceFunction<String> {

	/**
	 * 产生数据
	 */
	@Override
	public void run(SourceContext<String> sourceContext) throws Exception {
		while (true) {
			sourceContext.collect("flink spark storm");
		}
	}

	/**
	 * 关闭资源
	 */
	@Override
	public void cancel() {
	}

}

class RSource extends RichSourceFunction<String> {
	@Override
	public void open(Configuration parameters) throws Exception {
		super.open(parameters);
	}

	@Override
	public void run(SourceFunction.SourceContext<String> ctx) throws Exception {

	}

	@Override
	public void cancel() {

	}

	@Override
	public void close() throws Exception {
		super.close();
	}

}


package com.alibaba.flink.train.streaming;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

public class HelloWorld {
	public static void main(String[] args) throws Exception {
		StreamExecutionEnvironment env = StreamExecutionEnvironment
				.getExecutionEnvironment();
		// env.setParallelism(4);//并发度
		DataStream<String> dataStream = env
				.readTextFile("D:/flinkdata/helloworld"); // 1:(flink storm
															// )(hadoop hive)
		dataStream = env.addSource(new MemSource());
		dataStream
				.flatMap(
						new FlatMapFunction<String, Tuple2<String, Integer>>() {
							@Override
							public void flatMap(String input,
									Collector<Tuple2<String, Integer>> collector)
									throws Exception {
								String[] objs = input.split(" ");
								for (String obj : objs) {
									collector
											.collect(new Tuple2<String, Integer>(
													obj, 1));// (这里很关键,表示0位置是word,1的位置是1次数)
								}
							}
						})// 2:(flink 1)(storm 1)
				.keyBy(0)// 3:以第0个位置的值,做分区。
				.sum(1)// (flink:8)(storm:5),对第1个位置的值做sum的操作。
				.printToErr();
		env.execute();// 启动任务
		while (true) {

		}
	}

}



需要关注SourceFunction

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
首先,我们需要导入以下依赖: ```xml <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-core</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-java_${scala.binary.version}</artifactId> <version>${flink.version}</version> </dependency> ``` 然后,我们可以定义一个自定义的Source Function,使用DelayQueue产生测试数据。下面是一个简单的示例代码: ```java import org.apache.flink.streaming.api.functions.source.SourceFunction; import java.util.concurrent.DelayQueue; import java.util.concurrent.Delayed; import java.util.concurrent.TimeUnit; public class DelayedEventSource implements SourceFunction<DelayedEvent> { private volatile boolean running = true; private final DelayQueue<DelayedEvent> queue = new DelayQueue<>(); public void run(SourceContext<DelayedEvent> ctx) throws Exception { while (running) { DelayedEvent event = queue.take(); ctx.collect(event); } } public void cancel() { running = false; } public void addEvent(long timestamp, String message) { queue.put(new DelayedEvent(timestamp, message)); } public static class DelayedEvent implements Delayed { private final long timestamp; private final String message; public DelayedEvent(long timestamp, String message) { this.timestamp = timestamp; this.message = message; } public long getDelay(TimeUnit unit) { long diff = timestamp - System.currentTimeMillis(); return unit.convert(diff, TimeUnit.MILLISECONDS); } public int compareTo(Delayed other) { if (this.timestamp < ((DelayedEvent) other).timestamp) { return -1; } if (this.timestamp > ((DelayedEvent) other).timestamp) { return 1; } return 0; } public String getMessage() { return message; } } } ``` 在这个示例中,我们定义了一个DelayQueue,用来存储延迟事件。run()方法在一个while循环中循环获取DelayQueue中的事件,并使用ctx.collect()方法将事件发送给下游算子。addEvent()方法用于向DelayQueue中添加新的延迟事件。 最后,我们可以通过以下代码来使用自定义的Source Function: ```java DelayedEventSource source = new DelayedEventSource(); // 添加测试数据 source.addEvent(System.currentTimeMillis() + 1000, "Hello"); source.addEvent(System.currentTimeMillis() + 2000, "World"); DataStream<DelayedEventSource.DelayedEvent> stream = env.addSource(source); ``` 在这个示例中,我们创建了一个DelayedEventSource,并向其中添加了两个延迟事件。然后,我们将其作为Source Function添加到Flink的DataStream中。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值