Flink-入门编程及算子介绍

编程模型

在这里插入图片描述
Flink提供了不同级别的编程抽象,通过调用抽象的数据集调用算子构建DataFlow就可以实现对分布式的数据进行流式计算和离线计算,DataSet是批处理的抽象数据集,DataStream是流式计算的抽象数据集,他们的方法都分别为Source、Transformation、Sink

  • Source主要负责数据的读取
  • Transformation主要负责对数据的转换操作
  • Sink负责最终计算好的结果数据输出。

DataStream实时wordcount

package com.wedoctor.flink
import org.apache.flink.streaming.api.scala._

object WordCountDemo {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    val lines: DataStream[String] = env.socketTextStream("192.168.xx.xx",9999)
    val words: DataStream[String] = lines.flatMap(_.split(" "))
    val wordWithOne: DataStream[(String, Int)] = words.map((_,1))
    val keyedData: KeyedStream[(String, Int), String] = wordWithOne.keyBy(_._1)
    val sumData: DataStream[(String, Int)] = keyedData.sum(1)
    sumData.print()
    env.execute("Flink WordCount")
  }
}

DataSet 离线wordcount

package com.wedoctor.flink
import org.apache.flink.api.scala._
object WordCountDemo2 {
  def main(args: Array[String]): Unit = {
    val env = ExecutionEnvironment.getExecutionEnvironment
    val words: DataSet[Int] = env.fromElements(1,2,3)
    val tt: DataSet[Int] = words.map(t=>t*2)
    tt.print()
  }
}

Flink常见算子

1.map

package com.wedoctor.flink;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class MapTest {
    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> lines = env.socketTextStream("192.168.xx.xx", 9999);
        SingleOutputStreamOperator<String> words = lines.map(new MapFunction<String, String>() {
            @Override
            public String map(String value) throws Exception {
                return value.toUpperCase();
            }
        });
        words.print();
        env.execute();
    }
}

2.RichMapFunction

package com.wedoctor.flink;

import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class RichMapTest {

    //RichMapFunction
    //1.可以获取运行时上下文,可以得到很多的信息,subTaskIndex、状态数据等
    //2.还可以使用两个生命周期方法、open和close
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> lines = env.socketTextStream("192.168.XX.XX", 9999);
        SingleOutputStreamOperator<String> map = lines.map(new RichMapFunction<String, String>() {
            //构造对象完成后,map方法执行之前,执行一次
            @Override
            public void open(Configuration parameters) throws Exception {
                super.open(parameters);
                //此处可以建立连接
            }
            @Override
            public String map(String value) throws Exception {
                //处理数据
                return value + "222222222";
            }
            //subtask在停止之前,执行一次
            @Override
            public void close() throws Exception {
                super.close();
                //关闭连接
            }
        });
        map.print();
        env.execute();
    }
}

3.flatMap

package com.wedoctor.flink;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

public class FlatMapTest {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> lines = env.socketTextStream("192.168.xx.xx", 9999);
        SingleOutputStreamOperator<String> flatMap = lines.flatMap(new FlatMapFunction<String, String>() {
            @Override
            public void flatMap(String value, Collector<String> collector) throws Exception {
                String[] words = value.split(" ");
                for (String word : words) {
                    collector.collect(word);
                }
            }
        });
        flatMap.print();
        env.execute();
    }
}

4.filter

package com.wedoctor.flink;

import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class RichMapTest {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> lines = env.socketTextStream("192.168.xx.xx", 9999);
        SingleOutputStreamOperator<String> filter = lines.filter(new FilterFunction<String>() {
            @Override
            public boolean filter(String value) throws Exception {
                return value.length() == 2;
            }
        });
        filter.print();
        env.execute();
    }
}

5.keyBy

5.1.单个字段keyby

package com.wedoctor.flink;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

public class KeyByDemo {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> lines = env.socketTextStream("192.168.xx.xx", 9999);
        SingleOutputStreamOperator<Tuple2<String, Integer>> flatMap = lines.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public void flatMap(String s, Collector<Tuple2<String, Integer>> collector) throws Exception {
                String[] words = s.split(" ");
                for (String word : words) {
                    collector.collect(Tuple2.of(word, 1));
                }
            }
        });
        //按照单个字段分组 keyby
        KeyedStream<Tuple2<String, Integer>, Tuple> keyBy = flatMap.keyBy(0);
        KeyedStream<Tuple2<String, Integer>, String> keyBy1 = flatMap.keyBy(t -> t.f0);
        keyBy.print();
        keyBy1.print();
        env.execute();
    }
}

5.2 多个字段keyBy(过时API)

package com.wedoctor.flink;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class KeyByDemo {
    public static void main(String[] args) throws Exception {
       // jack 01 1232
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> lines = env.socketTextStream("192.168.xx.xx", 9999);
        SingleOutputStreamOperator<Tuple3<String, String, Integer>> map = lines.map(new MapFunction<String, Tuple3<String, String, Integer>>() {
            @Override
            public Tuple3<String, String, Integer> map(String s) throws Exception {
                String[] words = s.split(" ");
                String userId = words[0];
                String monthId = words[1];
                Integer orderCnt = Integer.parseInt(words[2]);
                return Tuple3.of(userId, monthId, orderCnt);
            }
        });
        KeyedStream<Tuple3<String, String, Integer>, Tuple> key = map.keyBy(0, 1);
        SingleOutputStreamOperator<Tuple3<String, String, Integer>> summed = key.sum(2);
        summed.print();
        env.execute();
    }
}

5.3.多个字段KeyBy(新API,Tuple封装)

package com.wedoctor.flink;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class KeyByDemo {
    public static void main(String[] args) throws Exception {
       // jack 01 1232
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> lines = env.socketTextStream("192.168.xx.xx", 9999);
        SingleOutputStreamOperator<Tuple3<String, String, Integer>> map = lines.map(new MapFunction<String, Tuple3<String, String, Integer>>() {
            @Override
            public Tuple3<String, String, Integer> map(String s) throws Exception {
                String[] words = s.split(" ");
                String userId = words[0];
                String monthId = words[1];
                Integer orderCnt = Integer.parseInt(words[2]);
                return Tuple3.of(userId, monthId, orderCnt);
            }
        });
        KeyedStream<Tuple3<String, String, Integer>, String> keyBy = map.keyBy(t -> t.f0 + t.f1);
        SingleOutputStreamOperator<Tuple3<String, String, Integer>> summed = keyBy.sum(2);
        summed.print();
        env.execute();
    }
}

5.4 多个字段KeyBy(POJO封装,终极)

package com.wedoctor.flink;

public class WordCount {
    public String word;
    public Integer count;
    public WordCount(String word, Integer count) {
        this.word = word;
        this.count = count;
    }
    public WordCount() {
    }
    public static WordCount of(String word,Integer count){
        return new WordCount(word,count);
    }
    @Override
    public String toString() {
        return "WordCount{" +
                "word='" + word + '\'' +
                ", count=" + count +
                '}';
    }
}
package com.wedoctor.flink;

public class WordCount {
    public String word;
    public Integer count;
    public WordCount(String word, Integer count) {
        this.word = word;
        this.count = count;
    }
    public WordCount() {
    }
    public static WordCount of(String word,Integer count){
        return new WordCount(word,count);
    }
    @Override
    public String toString() {
        return "WordCount{" +
                "word='" + word + '\'' +
                ", count=" + count +
                '}';
    }
}

6.reduce

package com.wedoctor.flink;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

public class ReduceDemo {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> lines = env.socketTextStream("192.168.xx.xx", 9999);
        SingleOutputStreamOperator<Tuple2<String, Integer>> flatMap = lines.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public void flatMap(String s, Collector<Tuple2<String, Integer>> collector) throws Exception {
                String[] words = s.split(" ");
                for (String word : words) {
                    collector.collect(Tuple2.of(word, 1));
                }
            }
        });
        SingleOutputStreamOperator<Tuple2<String, Integer>> reduce = flatMap.keyBy(t -> t.f0).reduce(new ReduceFunction<Tuple2<String, Integer>>() {
            @Override
            public Tuple2<String, Integer> reduce(Tuple2<String, Integer> t1, Tuple2<String, Integer> t2) throws Exception {
                return Tuple2.of(t1.f0, t1.f1 + t2.f1);
            }
        });
        reduce.print();
        env.execute();
    }
}

7.Aggregations

7.1 sum

package com.wedoctor.flink;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class KeyByDemo {
    public static void main(String[] args) throws Exception {
       // jack 01 1232
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> lines = env.socketTextStream("192.168.xx.xx", 9999);
        SingleOutputStreamOperator<Tuple3<String, String, Integer>> map = lines.map(new MapFunction<String, Tuple3<String, String, Integer>>() {
            @Override
            public Tuple3<String, String, Integer> map(String s) throws Exception {
                String[] words = s.split(" ");
                String userId = words[0];
                String monthId = words[1];
                Integer orderCnt = Integer.parseInt(words[2]);
                return Tuple3.of(userId, monthId, orderCnt);
            }
        });
        KeyedStream<Tuple3<String, String, Integer>, String> keyBy = map.keyBy(t -> t.f0 + t.f1);
        SingleOutputStreamOperator<Tuple3<String, String, Integer>> summed = keyBy.sum(2);
        summed.print();
        env.execute();
    }
}

7.2 min

package com.wedoctor.flink;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class AggDemo {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> lines = env.socketTextStream("192.168.x.xx", 9999);
        SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndCnt = lines.map(new MapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public Tuple2<String, Integer> map(String s) throws Exception {
                String[] fileds = s.split(" ");
                String word = fileds[0];
                int cnt = Integer.parseInt(fileds[1]);
                return Tuple2.of(word, cnt);
            }
        });
        //按照单个字段分组 keyby
        KeyedStream<Tuple2<String, Integer>, String> keyBy1 = wordAndCnt.keyBy(t -> t.f0);
        keyBy1.min(1).print();
        env.execute();
    }
}

7.3 max

package com.wedoctor.flink;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class AggDemo {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> lines = env.socketTextStream("192.168.xx.xx", 9999);
        SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndCnt = lines.map(new MapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public Tuple2<String, Integer> map(String s) throws Exception {
                String[] fileds = s.split(" ");
                String word = fileds[0];
                int cnt = Integer.parseInt(fileds[1]);
                return Tuple2.of(word, cnt);
            }
        });
        //按照单个字段分组 keyby
        KeyedStream<Tuple2<String, Integer>, String> keyBy1 = wordAndCnt.keyBy(t -> t.f0);
        keyBy1.max(1).print();
        env.execute();
    }
}

7.4 minBy

package com.wedoctor.flink;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class AggDemo {
    //lucy 2020-05 15
    //jack 2020-02 25
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> lines = env.socketTextStream("192.168.x.xx", 9999);
        SingleOutputStreamOperator<Tuple3<String,String, Integer>> map = lines.map(new MapFunction<String, Tuple3<String,String, Integer>>() {
            @Override
            public Tuple3<String, String,Integer> map(String s) throws Exception {
                String[] fileds = s.split(" ");
                String userId = fileds[0];
                String monthId = fileds[1];
                int orderCnt = Integer.parseInt(fileds[2]);
                return Tuple3.of(userId,monthId,orderCnt);
            }
        });
        KeyedStream<Tuple3<String, String, Integer>, String> keyBy = map.keyBy(t -> t.f0);
        keyBy.minBy(2,false).print();
        env.execute();
    }
}

7.5 maxBy

package com.wedoctor.flink;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class AggDemo {
    //lucy 2020-05 15
    //jack 2020-02 25
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> lines = env.socketTextStream("192.168.xx.xx", 9999);
        SingleOutputStreamOperator<Tuple3<String,String, Integer>> map = lines.map(new MapFunction<String, Tuple3<String,String, Integer>>() {
            @Override
            public Tuple3<String, String,Integer> map(String s) throws Exception {
                String[] fileds = s.split(" ");
                String userId = fileds[0];
                String monthId = fileds[1];
                int orderCnt = Integer.parseInt(fileds[2]);
                return Tuple3.of(userId,monthId,orderCnt);
            }
        });
        KeyedStream<Tuple3<String, String, Integer>, String> keyBy = map.keyBy(t -> t.f0);
        keyBy.maxBy(2,false).print();
        env.execute();
    }
}

8 union

package com.wedoctor.flink;

import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class UnionDemo {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //调用Source创建DataStream
        DataStreamSource<Integer> s1 = env.fromElements(1, 2, 3, 4, 5);
        DataStreamSource<Integer> s2 = env.fromElements(5, 7, 8, 9, 10);
        DataStream<Integer> unioned = s1.union(s2);
        unioned.print();
        env.execute();
    }
}

DataSet API
在这里插入图片描述
DataStream API
和DataSet API的区别为DataStream输入为一个无限的流
在这里插入图片描述
DataStream独有的API
在这里插入图片描述

参考
https://mp.weixin.qq.com/s?__biz=MzIxMjI3NTI5OQ==&mid=2650461709&idx=1&sn=b6f027e02ae9632a38766b5243c4ed32&chksm=8f46ef01b831661733e1c2f78e7f50fd2d806b9032a1b110b60692dd242700f41e90b7d6f474&scene=21#wechat_redirect
Flink DataStream常用算子
Flink的DataSet基本算子总结
Flink系列:常用算子一览表
官网地址

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值