自定义生成数据实现测试类,并行度不可修改+可修改方式。
目录
测试类
package com.atguigu.chapter05;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.ParallelSourceFunction;
import java.util.Random;
/**
* @author psl
* @create 2022/5/2 21:32
* @desc 自定义生成数据实现类测试类(并行度不可修改+可修改)
*/
public class ClickSourceTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
//自定义生成数据实现类,不可修改并行度,默认是1。
//DataStreamSource<Event> customStream = env.addSource(new ClickSource());
//自定义生成数据实现类,可修改并行度。
DataStreamSource<Integer> customStream = env.addSource(new ParallelCustomSource()).setParallelism(2);
customStream.print();
env.execute();
}
private static class ParallelCustomSource implements ParallelSourceFunction<Integer> {
private Boolean running = true;
private Random random = new Random();
@Override
public void run(SourceContext ctx) throws Exception {
while (running){
ctx.collect(random.nextInt());
}
}
@Override
public void cancel() {
running=false;
}
}
}
自定义生成数据实现类
package com.atguigu.chapter05;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import java.util.Calendar;
import java.util.Random;
/**
* @author psl
* @create 2022/5/2 21:19
* @desc 自定义生成数据实现类
*/
public class ClickSource implements SourceFunction<Event> {
private Boolean running = true;
@Override
public void run(SourceContext<Event> ctx) throws Exception {
Random random = new Random();
String[] users = {"Tom","Jack","Mary","Bob"};
String[] urls = {"./home","./cart","./prod","./prod?id=100","./prod?id=20"};
while (running){
String user = users[random.nextInt(users.length)];
String url = urls[random.nextInt(urls.length)];
Long timestamp = Calendar.getInstance().getTimeInMillis();
ctx.collect(new Event(user,url,timestamp));
Thread.sleep(1000L);
}
}
@Override
public void cancel() {
running = false;
}
}
运行结果
四个并行度运行