keyBy的4种用法

上篇:flatMap底层实现

第一种方式,下标类型

代码实现:

package cn._51doit.flink.day02;

import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
 * key的用法
 * 下标类型只适用于Tuple2【无界流数据】
 * 实现功能:按本节点的nc -lk 8888命令下,对输入的单词按下标做统计
 *    比如:输入重复单词按下标1开始记数,当输入不同单词就从1开始
 */
public class KeyedDemo01 {
    public static void main(String[] args) throws Exception {
        Configuration configuration = new Configuration();
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);

        //spark
        DataStreamSource<String> words = env.socketTextStream("Master", 8888);

        SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = words.map(w -> Tuple2.of(w, 1)).returns(Types.TUPLE(Types.STRING,Types.INT));

        //keyBy从0开始累加打印
        KeyedStream<Tuple2<String, Integer>, Tuple> keyed = wordAndOne.keyBy(0,1);
        keyed.sum(1).print();

        env.execute();
    }
}

打印输出 


第二种方式,按条件类型(过时了)

代码:统计金额

package cn._51doit.flink.day02;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;

import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
 * key的用法【无限流数据】
 *
 * 多个条件进行keyby
 *
 *  实现功能:按本节点的nc -lk 8888命令下,对输入的单词按3个条件进行keyby【省份、城市、金额】
 *         比如:上海市,浦东新区,1000、上海市,青浦区,500
 *注意:如果切数据发现中文逗号,会出错
 *
 */
public class KeyedDemo02 {
    public static void main(String[] args) throws Exception {
        Configuration configuration = new Configuration();
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);

        //省份、城市、金额
        DataStreamSource<String> lines = env.socketTextStream("Master", 8888);

        SingleOutputStreamOperator<Tuple3<String, String, Integer>> provinceCityAndMoney = lines.map(new MapFunction<String, Tuple3<String, String, Integer>>() {

            @Override
            public Tuple3<String, String, Integer> map(String value) throws Exception {
                String[] fields = value.split(",");
                return Tuple3.of(fields[0], fields[1], Integer.parseInt(fields[2]));
            }
        });

        KeyedStream<Tuple3<String, String, Integer>, Tuple> keyed = provinceCityAndMoney.keyBy(0, 1);

        keyed.sum(2).print();

        env.execute();
    }
}

控制台打印输出

改造之后方式1:单个Tuple3去创建

package cn._51doit.flink.day03;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;

import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class KeyedDemo02 {
    public static void main(String[] args) throws Exception {
        Configuration configuration = new Configuration();
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);

        //省份、城市、金额
        DataStreamSource<String> lines = env.socketTextStream("Master", 8888);

        SingleOutputStreamOperator<Tuple3<String, String, Integer>> provinceCityAndMoney = lines.map(new MapFunction<String, Tuple3<String, String, Integer>>() {

            @Override
            public Tuple3<String, String, Integer> map(String value) throws Exception {
                String[] fields = value.split(",");
                return Tuple3.of(fields[0], fields[1], Integer.parseInt(fields[2]));
            }
        });



        KeyedStream<Tuple3<String, String, Integer>, String> keyed = provinceCityAndMoney.keyBy(new KeySelector<Tuple3<String, String, Integer>, String>() {

            @Override
            public String getKey(Tuple3<String, String, Integer> value) throws Exception {
                return value.f0 + value.f1;
            }
        });

        keyed.sum(2).print();

        env.execute();
    }
}

改造之后的方式2:Tuple3Tuple2去创建

package cn._51doit.flink.day03;


import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;

import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class KeyedDemo03 {
    public static void main(String[] args) throws Exception {
        Configuration configuration = new Configuration();
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);

        //省份、城市、金额
        DataStreamSource<String> lines = env.socketTextStream("Master", 8888);

        SingleOutputStreamOperator<Tuple3<String, String, Integer>> provinceCityAndMoney = lines.map(new MapFunction<String, Tuple3<String, String, Integer>>() {

            @Override
            public Tuple3<String, String, Integer> map(String value) throws Exception {
                String[] fields = value.split(",");
                return Tuple3.of(fields[0], fields[1], Integer.parseInt(fields[2]));
            }
        });



        KeyedStream<Tuple3<String, String, Integer>, Tuple2<String, String>> keyed = provinceCityAndMoney.keyBy(new KeySelector<Tuple3<String, String, Integer>, Tuple2<String, String>>() {


            @Override
            public Tuple2<String, String> getKey
                    (Tuple3<String, String, Integer> value) throws Exception {
                return Tuple2.of(value.f0,value.f1);
            }
        });


        keyed.sum(2).print();

        env.execute();
    }
}


第三种方式,采用f0字段

为什么采用f0可以?

因为SingleOutputStreamOperator传了一个Tuple2,而Tuple2点击进去查看发现它是一个类,定义了公共的f0的字段名字

/** Field 0 of the tuple. */
	public T0 f0;
	/** Field 1 of the tuple. */
	public T1 f1;


代码实现:

package cn._51doit.flink.day03;

import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
 * key的用法
 * 采用f0作为字段统计【无界流数据】
 *
 * 实现功能:按本节点的nc -lk 8888命令下,对输入的单词按字段做统计
 *    比如:第一行输入hadoop spark flink,这样输入它会作为整体,当作一个字段去统计采用f0的使用原则只能独占一行去输入单词去统计
 */
public class KeyedDemo04 {
    public static void main(String[] args) throws Exception {
        Configuration configuration = new Configuration();
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);

        //spark
        DataStreamSource<String> words = env.socketTextStream("Master", 8888);

        SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = words.map(w -> Tuple2.of(w, 1)).returns(Types.TUPLE(Types.STRING,Types.INT));

        //keyBy从0开始累加打印
        KeyedStream<Tuple2<String, Integer>, Tuple> keyed = wordAndOne.keyBy("f0");
        keyed.sum(1).print();

        env.execute();
    }
}

控制台打印输出:


第四种方式,f0、f1字段一起使用

代码实现:

package cn._51doit.flink.day03;

import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**统
 *  * key的用法
 *  * 采用f0、f1作为字段统计【无界流数据】
 *  *  实现功能:按本节点的nc -lk 8888命令下,对输入的单词按条件做统计【按省份、城市、金额】
 *  *          f0是对条件相同单词合并并对该金额累加进行统计,f1去找到相同的组并对该组的单词的数字进行计
 */
public class KeyedDemo05 {
    public static void main(String[] args) throws Exception {
        Configuration configuration = new Configuration();
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);

        //spark
        DataStreamSource<String> words = env.socketTextStream("Master", 8888);

        SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = words.map(w -> Tuple2.of(w, 1)).returns(Types.TUPLE(Types.STRING,Types.INT));

        //keyBy从0开始累加打印
        KeyedStream<Tuple2<String, Integer>, Tuple> keyed = wordAndOne.keyBy("f0","f1");
        keyed.sum(1).print();

        env.execute();
    }
}

控制台打印输出:

  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 4
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值