一、分流合流算子
package com.atguigu.Adatastream_api.transformations;
import com.atguigu.Fbeans.SensorReading;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.datastream.*;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.CoMapFunction;
import java.util.Collections;
public class CManyStreamTest {
public static void main(String[] args) throws Exception {
//创建环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
//读取数据并包装成pojo
DataStreamSource<String> inputStream = env.readTextFile("G:\\SoftwareInstall\\idea\\project\\UserBehaviorAnalysis\\BasicKnowledge\\src\\main\\resources\\sensor.txt");
DataStream<SensorReading> dataStream = inputStream.map(line -> {
String[] fields = line.split(",");
return new SensorReading(new String(fields[0]), new Long(fields[1]), new Double(fields[2]));
});
/**
* 利用split将流拆分,利用select选择拆分得到的子流
* 这两个算子在flink1.2版本已标记为过时,可以借助outputTag结合process实现分流
*/
SplitStream<SensorReading> split = dataStream.split(new OutputSelector<SensorReading>() {
@Override
public Iterable<String> select(SensorReading sen) {
return sen.getTemperature() > 30 ? Collections.singletonList("high") : Collections.singletonList("low");
}
});
DataStream<SensorReading> highStream = split.select("high");
DataStream<SensorReading> lowStream = split.select("low");
/**
* 合流有两个算子:connect、union
* connect合并两条数据类型不一样的流,合并后虽然在同属一条流,但却彼此独立。
* 要想真正合流,后面需要在map算子中使用coMap算子和coFlatMap算子统一数据类型
* 如果无法统一成一样的数据类型,那就用Object做统一后的数据类型
* union:合并多条数据类型一样的流。
*/
//为了测试connect,先将highStream转换成和lowStream不一样的流
SingleOutputStreamOperator<Tuple2<String,Double>> newHighStream = highStream.map(new RichMapFunction<SensorReading, Tuple2<String, Double>>() {
@Override
public Tuple2<String, Double> map(SensorReading data) throws Exception {
return new Tuple2(data.getId(), data.getTemperature());
}
});
//泛型是两条子流的数据类型
ConnectedStreams<Tuple2<String, Double>, SensorReading> connectedStream = newHighStream.connect(lowStream);
//利用coMap对connect的合流进行处理,泛型三是最终合流类型,自己可以改
DataStream<Object> result1 = connectedStream.map(new CoMapFunction<Tuple2<String, Double>, SensorReading, Object>() {
@Override
public Object map1(Tuple2<String, Double> tuple) throws Exception {
return new Tuple3(tuple.f0, tuple.f1, "warning:温度太高了!!");
}
@Override
public Object map2(SensorReading sen) throws Exception {
return new Tuple3(sen.getId(), sen.getTemperature(), sen.getTimestamp());
}
});
//测试union,union内可以传多个类型一样的类
DataStream<SensorReading> result2 = highStream.union(lowStream,lowStream,highStream);
//输出数据
highStream.print("这是高温流!!!");
lowStream.print("这是低温流!!!");
result1.print("测试connect结合map中的coMap合并两条流。");
result2.print("测试union合并多条数据类型一致的流");
env.execute("合流分流算子测试");
}
}
二、重分区算子
1、shuffle
将数据随机发往下游的每一个suatask
2、rebalance
以轮询方式均分到下游每一个subtask
3、rescale
上游2个分区,下游4个分区。则上游每个分区的全量数据轮询发往下游的两个分区
4、broadcast
将数据广播给下游每一个subtask
5、global
所有数据发往下游第一个subtask
6、forward
只在当前分区做计算
7、partitionCustom
用户自定义分区方式