flink的union操作
在操作union操作之前必须保证列数相同、类型一致,它是把俩个数据流和在一起。
准备数据源
public class MyNoParalleSource implements SourceFunction<Integer> {
private Boolean isRunning=true;
private Integer counter=0;
public void run(SourceContext<Integer> sourceContext) throws Exception {
// 如何产生数据 每隔一秒
while(isRunning) {
//输出数据
sourceContext.collect(counter);
counter ++;
Thread.sleep(1000);
}
}
public void cancel() {
isRunning=false;
}
}
做合并操作
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class FlinkUnionDemo {
public static void main(String[] args) throws Exception {
//获取执行环境
StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<Integer> source1 = env.addSource(new MyNoParalleSource());
DataStreamSource<Integer> source2 = env.addSource(new MyNoParalleSource());
DataStream<Integer> union = source1.union(source2);
union.map(new MapFunction<Integer, Integer>() {
public Integer map(Integer value) throws Exception {
System.out.println("处理后的结果:"+value);
return value;
}
});
env.execute();
}
}
合并操作后的数据是一边一条进行合并
flink的connect操作
connect连接操作可以对不同的数据源进行拼接形成一个数据源
新的数据源有俩个类型,一个是第一个数据源的类型,一个是第二个数据源的类型
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.ConnectedStreams;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.CoMapFunction;
public class FlinkConectDemo {
public static void main(String[] args) throws Exception {
//创建flink的执行环境
StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();
//添加数据源
DataStream<Integer> source1=env.addSource(new MyNoParalleSource());
DataStream<String> source2=env
.addSource(new MyNoParalleSource())
.map(new MapFunction<Integer, String>() {
public String map(Integer value) throws Exception {
return "*****"+value;
}
});
ConnectedStreams<Integer, String> source=source1.connect(source2);
source.map(new CoMapFunction<Integer, String, Object>() {
public Object map1(Integer value) throws Exception {
// 对第一个source进行处理
return "对一个Source处理:"+value;
}
public Object map2(String value) throws Exception {
// 对第二个source进行处理
return "对二个Source处理:"+value;
}
}).print().setParallelism(1);
//执行环境
env.execute();
}
}
如果需要对连接后的数据源进行map操作的时候需要使用CoMapFunction
flink的filter操作
filter操作就很好理解,根据判断一个条件,如果是true那么就保留该条数据,如果是false就不保留该条数据
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class FlinkFilterDemo {
public static void main(String[] args) throws Exception {
//获取执行环境
StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();
//添加数据源
DataStream<Integer> source=env.addSource(new MyNoParalleSource());
source.filter(new FilterFunction<Integer>() {
public boolean filter(Integer integer) throws Exception {
if (integer%2==0){
return true;
}else {
return false;
}
}
}).print().setParallelism(1);
env.execute();
}
}
flink的split、select操作
split和select是结合起来使用,split是用来给数据做标记,然后再用select进行对做过标记的数据进行选择。
import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import java.util.ArrayList;
public class FlinkSplitDemo {
public static void main(String[] args) throws Exception {
//创建执行环境
StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();
//添加数据
DataStream<Integer> source=env.addSource(new MyNoParalleSource());
source.split(new OutputSelector<Integer>() {
//创建标签的集合(迭代器)
ArrayList<String> selector = new ArrayList<String>();
public Iterable<String> select(Integer value) {
if (value%2==0){
selector.add("even");
}else {
selector.add("odd");
}
return selector;
}
}).print().setParallelism(1);
//执行
env.execute();
}
}
flink的partition分区操作
分区规则
import org.apache.flink.api.common.functions.Partitioner;
public class MyPartitioner implements Partitioner<Integer> {
public int partition(Integer value, int i) {
if (value%2==0){
return 0;
}
return 1;
}
}
分区业务
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple1;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class FlinkPartitionDemo {
public static void main(String[] args) throws Exception {
//获取环境
StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();
//添加数据源
DataStreamSource<Integer> source = env.addSource(new MyNoParalleSource());
//需要注意的是分区的数据源必须是tuple形式的,因为分区的时候需要根据某个字段进行分区
DataStream<Tuple1<Integer>> source1=source.map(new MapFunction<Integer, Tuple1<Integer>>() {
public Tuple1<Integer> map(Integer value) throws Exception {
return new Tuple1<Integer>(value);
}
});
DataStream<Tuple1<Integer>> partitionCustom = source1.partitionCustom(new MyPartitioner(), 0);
DataStream<Integer> result = partitionCustom.map(new MapFunction<Tuple1<Integer>, Integer>() {
public Integer map(Tuple1<Integer> value) throws Exception {
Integer data = value.getField(0);
System.out.println("线程号:" + Thread.currentThread().getId()+"\t 数据:" + data);
return data;
}
});
result.print().setParallelism(1);
env.execute();
}
}
//需要注意的是分区的数据源必须是tuple形式的,因为分区的时候需要根据touple的某个字段进行分区,比如:
DataStream<Tuple1<Integer>> partitionCustom = source1.partitionCustom(new MyPartitioner(), 0);
这句代码我们就是根据touple1的第一个字段创建的分区