【Flink流式计算框架】常见Transformation操作

获取source的方式(自带的)
       基于文件:readTextFile()
       基于socket:socketTextStream
       基于集合:fromCollection(Collection)
       自定义数据源:addSource
              实现SourceFunction<>接口,重写run、cancel,单并行度数据源
              实现ParallelSourceFunction<>接口,Kafka多少个分区,这里设置多少个并行度
       Flink自带的connector
              Kafka
常见transformation操作
       Map和filter
       FlatMap、keyby、sum
              一行变多行flatmap、一行对一行用map
              keyBy按key分组
       union:两个数据流合并到一起
       connect、conMap和conFlatMap
       split(new OutputSelector{…})
              对流按规则进行切分,可用select(“XXX”,…)按规则名取出
常见sink操作
       Print()/printToErr()
       writeAsText()
       自定义sink() 到redis
              New flinkJedisPoolConfig.Builder().setHost()…
              实现RedisMapper接口,重写getKeyFromData、getValueFromData、getCommandDescriptor
              RedisCommad.LPUSH数据结构选择
       Flink自带的connector->kafka、ES

连接redis客户端:
redis-cli -h 192.168.167.254 -p 6379

map和filter

/**
 * 数据源:1 2 3 4 5.....源源不断过来
 * 通过map打印一下接受到数据
 * 通过filter过滤一下数据,我们只需要偶数
 */
public class MapDemo {
        public static void main(String[] args) throws Exception {
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
            DataStreamSource<Long> numberStream = env.addSource(new MyNoParalleSource()).setParallelism(1);
            SingleOutputStreamOperator<Long> dataStream = numberStream.map(new MapFunction<Long, Long>() {
                @Override
                public Long map(Long value) throws Exception {
                    System.out.println("接受到了数据:"+value);
                    return value;
                }
            });
            SingleOutputStreamOperator<Long> filterDataStream = dataStream.filter(new FilterFunction<Long>() {
                @Override
                public boolean filter(Long number) throws Exception {
                    return number % 2 == 0;
                }
            });

            filterDataStream.print().setParallelism(1);
            env.execute("StreamingDemoWithMyNoPralalleSource");
        }
}

flatMap,keyBy和sum

/**
 * 滑动窗口实现单词计数
 * 数据源:socket
 * 需求:每隔1秒计算最近2秒单词出现的次数
 *
 * flatMap
 * keyBy:
 *    dataStream.keyBy("someKey") // 指定对象中的 "someKey"字段作为分组key
 *    dataStream.keyBy(0) //指定Tuple中的第一个元素作为分组key
 * sum
 */
public class WindowWordCountJava {
    public static void main(String[] args) throws  Exception {
        int port;
        try{
            ParameterTool parameterTool = ParameterTool.fromArgs(args);
            port = parameterTool.getInt("port");
        }catch (Exception e){
            System.err.println("no port set,user default port 9988");
            port=9988;
        }
        //步骤一:获取flink运行环境(stream)
        StreamExecutionEnvironment env= StreamExecutionEnvironment.getExecutionEnvironment();
        String hostname="10.126.88.226";
        String delimiter="\n";
        //步骤二:获取数据源
        DataStreamSource<String> textStream = env.socketTextStream(hostname, port, delimiter);
        //步骤三:执行transformation操作
        SingleOutputStreamOperator<WordCount> wordCountStream = textStream.flatMap(new FlatMapFunction<String, WordCount>() {
            public void flatMap(String line, Collector<WordCount> out) throws Exception {
                String[] fields = line.split("\t");
                for (String word : fields) {
                    out.collect(new WordCount(word, 1L));
                }
            }
        }).keyBy("word")
                .timeWindow(Time.seconds(2), Time.seconds(1))//每隔1秒计算最近2秒
                .sum("count");

        wordCountStream.print().setParallelism(1);//打印并设置并行度
        //步骤四:运行程序
        env.execute("socket word count");


    }

    public static class WordCount{
        public String word;
        public long count;
        public WordCount(){

        }
        public WordCount(String word,long count){
            this.word=word;
            this.count=count;
        }

        @Override
        public String toString() {
            return "WordCount{" +
                    "word='" + word + '\'' +
                    ", count=" + count +
                    '}';
        }
    }
}

union

/**
 * 合并多个流,新的流会包含所有流中的数据,但是union是一个限制,就是所有合并的流类型必须是一致的
 */
public class unionDemo {
    public static void main(String[] args) throws Exception {
        //获取Flink的运行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //获取数据源
        DataStreamSource<Long> text1 = env.addSource(new MyNoParalleSource()).setParallelism(1);//注意:针对此source,并行度只能设置为1
        DataStreamSource<Long> text2 = env.addSource(new MyNoParalleSource()).setParallelism(1);

        //把text1和text2组装到一起
        DataStream<Long> text = text1.union(text2);

        DataStream<Long> num = text.map(new MapFunction<Long, Long>() {
            @Override
            public Long map(Long value) throws Exception {
                System.out.println("原始接收到数据:" + value);
                return value;
            }
        });
        //每2秒钟处理一次数据
        DataStream<Long> sum = num.timeWindowAll(Time.seconds(2)).sum(0);
        //打印结果
        sum.print().setParallelism(1);
        String jobName = unionDemo.class.getSimpleName();
        env.execute(jobName);
    }
}

connect,conMap和conFlatMap

/**
 * 和union类似,但是只能连接两个流,两个流的数据类型可以不同,会对两个流中的数据应用不同的处理方法
 */
public class ConnectionDemo {
    public static void main(String[] args) throws Exception {
        //获取Flink的运行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        //获取数据源
        DataStreamSource<Long> text1 = env.addSource(new MyNoParalleSource()).setParallelism(1);//注意:针对此source,并行度只能设置为1

        DataStreamSource<Long> text2 = env.addSource(new MyNoParalleSource()).setParallelism(1);
        SingleOutputStreamOperator<String> text2_str = text2.map(new MapFunction<Long, String>() {
            @Override
            public String map(Long value) throws Exception {
                return "str_" + value;
            }
        });

        ConnectedStreams<Long, String> connectStream = text1.connect(text2_str);

        SingleOutputStreamOperator<Object> result = connectStream.map(new CoMapFunction<Long, String, Object>() {
            @Override
            public Object map1(Long value) throws Exception {
                return value;
            }

            @Override
            public Object map2(String value) throws Exception {
                return value;
            }
        });

        //打印结果
        result.print().setParallelism(1);
        String jobName = ConnectionDemo.class.getSimpleName();
        env.execute(jobName);
    }
}

Split和Select

/**
 *  根据规则把一个数据流切分为多个流
 应用场景:
 * 可能在实际工作中,源数据流中混合了多种类似的数据,多种类型的数据处理规则不一样,所以就可以在根据一定的规则,
 * 把一个数据流切分成多个数据流,这样每个数据流就可以使用不用的处理逻辑了
 */
public class SplitDemo {
    public static void main(String[] args) throws  Exception {
        //获取Flink的运行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //获取数据源
        DataStreamSource<Long> text = env.addSource(new MyNoParalleSource()).setParallelism(1);//注意:针对此source,并行度只能设置为1
        //对流进行切分,按照数据的奇偶性进行区分
        SplitStream<Long> splitStream = text.split(new OutputSelector<Long>() {
            @Override
            public Iterable<String> select(Long value) {
                ArrayList<String> outPut = new ArrayList<>();
                if (value % 2 == 0) {
                    outPut.add("even");//偶数
                } else {
                    outPut.add("odd");//奇数
                }
                return outPut;
            }
        });

        //选择一个或者多个切分后的流
        DataStream<Long> evenStream = splitStream.select("even");
        DataStream<Long> oddStream = splitStream.select("odd");
        DataStream<Long> moreStream = splitStream.select("odd","even");

        //打印结果
        evenStream.print().setParallelism(1);
        String jobName = SplitDemo.class.getSimpleName();
        env.execute(jobName);
    }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值