0、keyBy
参考:添加链接描述
1、shuffle源码及案例(ShufflePartitioner,随机)
源码
//源码
public DataStream<T> shuffle() {
//通过ShufflePartitioner进行分区
return setConnectionType(new ShufflePartitioner<T>());
}
public class ShufflePartitioner<T> extends StreamPartitioner<T> {
private Random random = new Random();
//通过random随机生成分区数
@Override
public int selectChannel(SerializationDelegate<StreamRecord<T>> record) {
return random.nextInt(numberOfChannels);
}
}
案例
public class _06_Random {
public static void main(String[] args) throws Exception {
//创建一个执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
//调用source来创建DataStream
DataStreamSource<String> source = env.socketTextStream("192.168.42.101", 8888);
SingleOutputStreamOperator<String> operator = source.map(new RichMapFunction<String, String>() {
@Override
public String map(String value) throws Exception {
int index = getRuntimeContext().getIndexOfThisSubtask();
return value + " -> " + index;
}
}).setParallelism(1);
DataStream<String> shuffle = operator.shuffle();
shuffle.addSink(new RichSinkFunction<String>() {
@Override
public void invoke(String value, Context context) throws Exception {
int index = getRuntimeContext().getIndexOfThisSubtask();
System.out.println(value + " -> " + index);
}
});
env.execute("_06_Random");
}
}
结果:
a -> 0 -> 1
a -> 0 -> 9
a -> 0 -> 3
a -> 0 -> 1
a -> 0 -> 1
a -> 0 -> 6
a -> 0 -> 3
a -> 0 -> 7
a -> 0 -> 9
...
2、Rebalance源码及案例(轮询,RebalancePartitioner)
源码:
public DataStream<T> rebalance() {
//通过RebalancePartitioner进行分区
return setConnectionType(new RebalancePartitioner<T>());
}
public class RebalancePartitioner<T> extends StreamPartitioner<T> {
private int nextChannelToSendTo;
@Override
public void setup(int numberOfChannels) {
super.setup(numberOfChannels);
nextChannelToSendTo = ThreadLocalRandom.current().nextInt(numberOfChannels);
}
@Override
public int selectChannel(SerializationDelegate<StreamRecord<T>> record) {
//通过轮询的方式生成分区
nextChannelToSendTo = (nextChannelToSendTo + 1) % numberOfChannels;
return nextChannelToSendTo;
}
}
案例:
public class _07_Rebalance {
public static void main(String[] args) throws Exception {
//创建一个执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
//调用source来创建DataStream
DataStreamSource<String> source = env.socketTextStream("192.168.42.101", 8888);
SingleOutputStreamOperator<String> operator = source.map(new RichMapFunction<String, String>() {
@Override
public String map(String value) throws Exception {
int index = getRuntimeContext().getIndexOfThisSubtask();
return value + " -> " + index;
}
}).setParallelism(1);
DataStream<String> shuffle = operator.rebalance();
shuffle.addSink(new RichSinkFunction<String>() {
@Override
public void invoke(String value, Context context) throws Exception {
int index = getRuntimeContext().getIndexOfThisSubtask();
System.out.println(value + " -> " + index);
}
});
env.execute("_07_Rebalance");
}
}
结果:
a -> 0 -> 0
a -> 0 -> 1
a -> 0 -> 2
a -> 0 -> 3
a -> 0 -> 4
a -> 0 -> 5
a -> 0 -> 6
a -> 0 -> 7
a -> 0 -> 8
3、Rescaling类似于Rebalance,不同点是:在一个TaskManager中轮询(RebalancePartitioner)
4、broadcast案例(将同一个数据拷贝到所有的channel对应的buffer)
案例
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
public class _08_Broadcast {
public static void main(String[] args) throws Exception {
//创建一个执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
//调用source来创建DataStream
DataStreamSource<String> source = env.socketTextStream("192.168.42.101", 8888);
SingleOutputStreamOperator<String> operator = source.map(new RichMapFunction<String, String>() {
@Override
public String map(String value) throws Exception {
int index = getRuntimeContext().getIndexOfThisSubtask();
return value + " -> " + index;
}
}).setParallelism(1);
DataStream<String> shuffle = operator.broadcast();
shuffle.addSink(new RichSinkFunction<String>() {
@Override
public void invoke(String value, Context context) throws Exception {
int index = getRuntimeContext().getIndexOfThisSubtask();
System.out.println(value + " -> " + index);
}
});
env.execute("_07_Rebalance");
}
}
结果:
a -> 0 -> 2
a -> 0 -> 5
a -> 0 -> 1
a -> 0 -> 8
a -> 0 -> 4
a -> 0 -> 3
a -> 0 -> 7
a -> 0 -> 11
a -> 0 -> 0
a -> 0 -> 9
a -> 0 -> 10
a -> 0 -> 6
5、自定义分区器partitionCustom
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.Partitioner;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSink;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
public class _09_PartitionCustom {
public static void main(String[] args) throws Exception {
//创建一个执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
//调用source来创建DataStream
DataStreamSource<String> source = env.socketTextStream("192.168.42.101", 8888);
SingleOutputStreamOperator<Tuple2<String, Integer>> map = source.map(new MapFunction<String, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> map(String value) throws Exception {
return Tuple2.of(value, 1);
}
}).setParallelism(2);
/**
* 第一个参数:RichSinkFunction 需要重写分区方法
* 第二个参数:KeySelector 需要指定分区的key
*/
DataStream<Tuple2<String, Integer>> stream = map.partitionCustom(new Partitioner<String>() {
@Override
public int partition(String key, int numPartitions) {
int index = 0;
if(key.startsWith("j")){
index = 8;
}
return index;
}
}, new KeySelector<Tuple2<String, Integer>, String>() {
@Override
public String getKey(Tuple2<String, Integer> value) throws Exception {
return value.f0;
}
});
DataStreamSink<Tuple2<String, Integer>> sink = stream.addSink(new RichSinkFunction<Tuple2<String, Integer>>() {
@Override
public void invoke(Tuple2<String, Integer> value, Context context) throws Exception {
int index = getRuntimeContext().getIndexOfThisSubtask();
System.out.println(value + " -> " + index);
}
});
env.execute("_09_PartitionCustom");
}
}
结果:
(java,1) -> 8
(jva,1) -> 8
(hello,1) -> 0