Flink DataStream API之Transformations

最新推荐文章于 2024-01-30 19:31:14 发布

Jiny_li

最新推荐文章于 2024-01-30 19:31:14 发布

阅读量484

点赞数

文章标签： Flink

本文链接：https://blog.csdn.net/Jiny_li/article/details/86187388

版权

map：输入一个元素，然后返回一个元素，中间可以做一些清洗转换等操作

flatmap：输入一个元素，可以返回零个，一个或者多个元素

filter：过滤函数，对传入的数据进行判断，符合条件的数据会被留下

keyBy：根据指定的key进行分组，相同key的数据会进入同一个分区

两种典型用法

dataStream.keyBy("someKey") // 指定对象中的 "someKey"字段作为分组key

dataStream.keyBy(0) //指定Tuple中的第一个元素作为分组key

注意：以下类型是无法作为key的

1：一个实体类对象，没有重写hashCode方法，并且依赖object的hashCode方法

2：一个任意形式的数组类型

3：基本数据类型，int，long

reduce：对数据进行聚合操作，结合当前元素和上一次reduce返回的值进行聚合操作，然后返回一个新的值

aggregations：sum(),min(),max()等

Union：合并多个流，新的流会包含所有流中的数据，但是union是一个限制，就是所有合并的流类型必须是一致的。

Connect：和union类似，但是只能连接两个流，两个流的数据类型可以不同，会对两个流中的数据应用不同的处理方法。

CoMap, CoFlatMap：在ConnectedStreams中需要使用这种函数，类似于map和flatmap

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.ConnectedStreams;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.CoMapFunction;

public class TestConnect {

    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
         DataStream<Long> text1 = env.addSource(new NoParalleSource());
         DataStream<Long> text2 = env.addSource(new NoParalleSource());
        // DataStream<Long> text = text1.union(text2);
        SingleOutputStreamOperator<String> num = text1.map(new MapFunction<Long, String>() {
            @Override
            public String map(Long value) throws Exception {
                return value + "_str";
            }
        });
        ConnectedStreams<String, Long> connect = num.connect(text2);
        SingleOutputStreamOperator<Object> map = connect.map(new CoMapFunction<String, Long, Object>() {
            @Override
            public Object map1(String value) throws Exception {
                return value;
            }

            @Override
            public Object map2(Long value) throws Exception {
                return value;
            }
        });
        map.print().setParallelism(1);

        String name = TestConnect.class.getSimpleName();
        env.execute(name);
    }

public static class NoParalleSource implements SourceFunction<Long> {
    private long count =1;
    private boolean isRun = true;
    @Override
    public void run(SourceContext<Long> ctx) throws Exception {
        while (isRun) {
            ctx.collect(count++);
            Thread.sleep(1000);
        }
    }

    @Override
    public void cancel() {
        isRun = false;
    }
  }
}

Split：根据规则把一个数据流切分为多个流

Select：和split配合使用，选择切分后的流

import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SplitStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.util.ArrayList;

public class TestSplit {

    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
         DataStream<Long> text = env.addSource(new NoParalleSource());
        SplitStream<Long> split = text.split(new OutputSelector<Long>() {
            @Override
            public Iterable<String> select(Long value) {
                ArrayList<String> list = new ArrayList<>();
                if (value % 2 == 0) {
                    list.add("eve");
                } else {
                    list.add("odd");
                }
                return list;
            }
        });
        DataStream<Long> eve = split.select("eve");
        eve.print().setParallelism(1);

        String name = TestSplit.class.getSimpleName();
        env.execute(name);
    }
public static class NoParalleSource implements SourceFunction<Long> {
    private long count =1;
    private boolean isRun = true;
    @Override
    public void run(SourceContext<Long> ctx) throws Exception {
        while (isRun) {
            ctx.collect(count++);
            Thread.sleep(1000);
        }
    }

    @Override
    public void cancel() {
        isRun = false;
    }
  }
}