基于File的数据源
readTextFile:使用TextInputFormat方式读取文本文件,并将以String返回
val env = StreamExecutionEnvironment.getExecutionEnvironment val inputStream = env.readTextFile(文件路径) inputStream.print() env.execute()
基于Socket的数据源
socketTextStream:从Socket中读取信息,元素可以用分隔符分开
val env = StreamExecutionEnvironment.getExecutionEnvironment val inputStream = env.socketTextStream("localhost", 8888) nputStream.print() env.execute()
打印每个元素的toString()方法的值到标准输出或者标准错误输出流中。或者也可以在输出流中添加一个前缀,这个可以帮助区分不同的打印调用,如果并行度大于1,那么输出也会有一个标识由哪个任务产生的标志
自定义Sink和Source(addSource addSink)
public class MySink { public static void main(String[] args) throws Exception { Configuration configuration = new Configuration(); configuration.setInteger("rest.port",9999); StreamExecutionEnvironment evn = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration); DataStreamSource<String> lines = evn.addSource(new MySource()); lines.addSink(new Mysink()); evn.execute(); } //自定义Source private static class MySource extends RichParallelSourceFunction<String> { boolean flag=true; @Override public void open(Configuration parameters) throws Exception { super.open(parameters); } @Override public void close() throws Exception { super.close(); } @Override public void run(SourceContext<String> ctx) throws Exception { int indexOfThisSubtask = getRuntimeContext().getIndexOfThisSubtask(); while (flag){ String s = UUID.randomUUID().toString(); ctx.collect(s+indexOfThisSubtask); Thread.sleep(1000); } } @Override public void cancel() { flag=false; } } //自定义Sink private static class Mysink extends RichSinkFunction<String> { int indexOfThisSubtask; @Override public void open(Configuration parameters) throws Exception { indexOfThisSubtask = getRuntimeContext().getIndexOfThisSubtask(); } @Override public void close() throws Exception { System.out.println("释放资源"); } @Override public void invoke(String value, Context context) throws Exception { System.out.println(indexOfThisSubtask+"> "+value); } } }