Data Sources
- source是程序的数据输入,你可以通过SteamExecutionEnvironmpent.addSource(SourceFuntion) 来为你的程序添加一个source
- flink提供了大量的已经实现好的source方法,你也可以自己自定义source
(1). 通过实现sourceFunction接口来自定义无并行度的source
package com.im.flink.task.Streaming;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
/**
* describe: 自定义实现并行度为1 的source
*
* @author lm
* @date 2019/11/6
*/
public class MyNoParallelSource implements SourceFunction<Long> {
private boolean isRunning = true;
private Long count = 1L;
/**
* 主要的方法
* 启动一个source
* 大部分情况下,获取数据源,产生数据
*
* @param ctx
* @throws Exception
*/
@Override
public void run(SourceContext<Long> ctx) throws Exception {
while (isRunning){
ctx.collect(count);
count++;
Thread.sleep(1000);
}
}
/**
* 取消一个cancel 的时候 会调用的方法
*
*/
@Override
public void cancel() {
isRunning = false;
}
}
package com.im.flink.task.Streaming;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
/**
* describe:使用并行度为1的source
*
* @author lm
* @date 2019/11/6
*/
public class StreamingDemoNoPralalleSource {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<Long> text = env.addSource(new MyNoParallelSource());
text.map(new MapFunction<Long, Object>() {
@Override
public Object map(Long aLong) throws Exception {
System.out.println("StreamingDemoNoPralalleSource.map:"+aLong);
return aLong;
}
});
SingleOutputStreamOperator<Long> sum = text.timeWindowAll(Time.seconds(2)).sum(0);
sum.print().setParallelism(1);
env.execute("StreamingDemoNoPralalleSource");
}
}
(2). 或者通过是实现ParallelSouceFunction接口 or 继承RichParallelSourceFuntion来自定义有并行度的source
package com.im.flink.task.Streaming;
import org.apache.flink.streaming.api.functions.source.ParallelSourceFunction;
/**
* describe:
* 自定义是实现一个支持并行度的source
*
* @author lm
* @date 2019/11/6
*/
public class MyPralalleSource implements ParallelSourceFunction<Long> {
private boolean isRunning = true;
private Long count = 1L;
/**
* 主要的方法
* 启动一个source
* 大部分情况下,获取数据源,产生数据
*
* @param ctx
* @throws Exception
*/
@Override
public void run(SourceContext<Long> ctx) throws Exception {
while (isRunning){
ctx.collect(count);
count++;
Thread.sleep(1000);
}
}
/**
* 取消一个cancel 的时候 会调用的方法
*
*/
@Override
public void cancel() {
isRunning = false;
}
}
package com.im.flink.task.Streaming;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
/**
* describe:
*
* @author lm
* @date 2019/11/6
*/
public class StreamingDemoPralalleSource {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 支持并行度的source 如果不设置并行度 自动根据系统CPU进行设置并行度
DataStreamSource<Long> text = env.addSource(new MyPralalleSource()).setParallelism(1);
text.map(new MapFunction<Long, Object>() {
@Override
public Object map(Long aLong) throws Exception {
System.out.println("StreamingDemoNoPralalleSource.map:"+aLong);
return aLong;
}
});
SingleOutputStreamOperator<Long> sum = text.timeWindowAll(Time.seconds(2)).sum(0);
sum.print().setParallelism(1);
env.execute("StreamingDemoNoPralalleSource");
}
}
package com.im.flink.task.Streaming;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
/**
* describe:
*
* RichParallelSourceFunction 多了open close 方法
*
* 针对source 中如果需要获取其他资源链接,在close 中关闭链接
* @author