flink物理分区算子源码分析(shuffle,rebalance,broadcast)

0、keyBy

参考:添加链接描述

1、shuffle源码及案例(ShufflePartitioner,随机)

源码

//源码
public DataStream<T> shuffle() {
        //通过ShufflePartitioner进行分区
        return setConnectionType(new ShufflePartitioner<T>());
    }

public class ShufflePartitioner<T> extends StreamPartitioner<T> {

    private Random random = new Random();
     //通过random随机生成分区数
    @Override
    public int selectChannel(SerializationDelegate<StreamRecord<T>> record) {
        return random.nextInt(numberOfChannels);
    }
}

案例

public class _06_Random {
    public static void main(String[] args) throws Exception {
        //创建一个执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
        //调用source来创建DataStream
        DataStreamSource<String> source = env.socketTextStream("192.168.42.101", 8888);

        SingleOutputStreamOperator<String> operator = source.map(new RichMapFunction<String, String>() {
            @Override
            public String map(String value) throws Exception {
                int index = getRuntimeContext().getIndexOfThisSubtask();
                return value + " -> " + index;
            }
        }).setParallelism(1);


        DataStream<String> shuffle = operator.shuffle();

        shuffle.addSink(new RichSinkFunction<String>() {
            @Override
            public void invoke(String value, Context context) throws Exception {
                int index = getRuntimeContext().getIndexOfThisSubtask();
                System.out.println(value + " -> " + index);
            }
        });
        env.execute("_06_Random");
    }
}


结果:
a -> 0 -> 1
a -> 0 -> 9
a -> 0 -> 3
a -> 0 -> 1
a -> 0 -> 1
a -> 0 -> 6
a -> 0 -> 3
a -> 0 -> 7
a -> 0 -> 9
...

2、Rebalance源码及案例(轮询,RebalancePartitioner)

在这里插入图片描述

源码:

public DataStream<T> rebalance() {
        //通过RebalancePartitioner进行分区
        return setConnectionType(new RebalancePartitioner<T>());
    }

public class RebalancePartitioner<T> extends StreamPartitioner<T> {
    private int nextChannelToSendTo;

    @Override
    public void setup(int numberOfChannels) {
        super.setup(numberOfChannels);
        nextChannelToSendTo = ThreadLocalRandom.current().nextInt(numberOfChannels);
    }

    @Override
    public int selectChannel(SerializationDelegate<StreamRecord<T>> record) {
        //通过轮询的方式生成分区
        nextChannelToSendTo = (nextChannelToSendTo + 1) % numberOfChannels;
        return nextChannelToSendTo;
    }

}

案例:

public class _07_Rebalance {
    public static void main(String[] args) throws Exception {
        //创建一个执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
        //调用source来创建DataStream
        DataStreamSource<String> source = env.socketTextStream("192.168.42.101", 8888);

        SingleOutputStreamOperator<String> operator = source.map(new RichMapFunction<String, String>() {
            @Override
            public String map(String value) throws Exception {
                int index = getRuntimeContext().getIndexOfThisSubtask();
                return value + " -> " + index;
            }
        }).setParallelism(1);


        DataStream<String> shuffle = operator.rebalance();

        shuffle.addSink(new RichSinkFunction<String>() {
            @Override
            public void invoke(String value, Context context) throws Exception {
                int index = getRuntimeContext().getIndexOfThisSubtask();
                System.out.println(value + " -> " + index);
            }
        });
        env.execute("_07_Rebalance");
    }
}

结果:
a -> 0 -> 0
a -> 0 -> 1
a -> 0 -> 2
a -> 0 -> 3
a -> 0 -> 4
a -> 0 -> 5
a -> 0 -> 6
a -> 0 -> 7
a -> 0 -> 8

3、Rescaling类似于Rebalance,不同点是:在一个TaskManager中轮询(RebalancePartitioner)

在这里插入图片描述

4、broadcast案例(将同一个数据拷贝到所有的channel对应的buffer)

案例

import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

public class _08_Broadcast {
    public static void main(String[] args) throws Exception {
        //创建一个执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
        //调用source来创建DataStream
        DataStreamSource<String> source = env.socketTextStream("192.168.42.101", 8888);

        SingleOutputStreamOperator<String> operator = source.map(new RichMapFunction<String, String>() {
            @Override
            public String map(String value) throws Exception {
                int index = getRuntimeContext().getIndexOfThisSubtask();
                return value + " -> " + index;
            }
        }).setParallelism(1);


        DataStream<String> shuffle = operator.broadcast();

        shuffle.addSink(new RichSinkFunction<String>() {
            @Override
            public void invoke(String value, Context context) throws Exception {
                int index = getRuntimeContext().getIndexOfThisSubtask();
                System.out.println(value + " -> " + index);
            }
        });
        env.execute("_07_Rebalance");
    }
}
结果:
a -> 0 -> 2
a -> 0 -> 5
a -> 0 -> 1
a -> 0 -> 8
a -> 0 -> 4
a -> 0 -> 3
a -> 0 -> 7
a -> 0 -> 11
a -> 0 -> 0
a -> 0 -> 9
a -> 0 -> 10
a -> 0 -> 6

5、自定义分区器partitionCustom

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.Partitioner;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSink;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

public class _09_PartitionCustom {
    public static void main(String[] args) throws Exception {
        //创建一个执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
        //调用source来创建DataStream
        DataStreamSource<String> source = env.socketTextStream("192.168.42.101", 8888);


        SingleOutputStreamOperator<Tuple2<String, Integer>> map = source.map(new MapFunction<String, Tuple2<String, Integer>>() {

            @Override
            public Tuple2<String, Integer> map(String value) throws Exception {
                return Tuple2.of(value, 1);
            }
        }).setParallelism(2);

        /**
         *  第一个参数:RichSinkFunction   需要重写分区方法
         *  第二个参数:KeySelector      需要指定分区的key
         */

        DataStream<Tuple2<String, Integer>> stream = map.partitionCustom(new Partitioner<String>() {
            @Override
            public int partition(String key, int numPartitions) {
                int index = 0;
                if(key.startsWith("j")){
                    index = 8;
                }
                return index;
            }
        }, new KeySelector<Tuple2<String, Integer>, String>() {
            @Override
            public String getKey(Tuple2<String, Integer> value) throws Exception {
                return value.f0;
            }
        });
        
        DataStreamSink<Tuple2<String, Integer>> sink = stream.addSink(new RichSinkFunction<Tuple2<String, Integer>>() {
            @Override
            public void invoke(Tuple2<String, Integer> value, Context context) throws Exception {
                int index = getRuntimeContext().getIndexOfThisSubtask();
                System.out.println(value + " -> " + index);
            }
        });
        env.execute("_09_PartitionCustom");

    }
}

结果:
(java,1) -> 8
(jva,1) -> 8
(hello,1) -> 0
  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值