写法比较套路,整体思路是:
- 定义一个需要生成的数据类型
- 实现
SourceFunction
接口的两个功能 - 直接使用
env.addSource()
传入即可
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import java.util.Random;
public class CreateData {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<Float> sourceStream = env.addSource(new MyDataSource());
env.setParallelism(1);
sourceStream.print();
env.execute();
}
private static class MyDataSource implements SourceFunction<Float> {
// 定义标志位,用来控制数据的产生
private boolean isRunning = true;
private final Random random = new Random(0);
@Override
public void run(SourceContext ctx) throws Exception {
while (isRunning) {
ctx.collect(random.nextFloat());
Thread.sleep(1000L); // 1s生成1个数据
}
}
@Override
public void cancel() {
isRunning = false;
}
}
}