Flink 流处理API

流处理API分为四部分,分别是enviroment(创建执行环境),source(获取数据源),transform(转换算子,逻辑处理),sink(数据输出)。

Enviroment

getExecutionEnvironment

创建一个执行环境,表示当前执行程序的上下文。如果程序是独立调用的,则此方法返回本地执行环境;如果从命令行客户端调用程序以提交到集群,则此方法放回此集群的执行环境。

总结:他会根据查询运行的方式决定返回什么样的环境,是最常用的一种创建执行环境的方式。

// 创建执行环境
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// 创建流执行环境
StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();

如果没有设置并行度,会以flink-conf.yaml中的配置为准,默认是1。

createLocalEnvironment

返回本地执行环节,需要在调用中指定并行度。

// 创建本地执行环境
LocalStreamEnvironment localEnv = StreamExecutionEnvironment.createLocalEnvironment(1);

createRemoteEnvironment

返回集群执行环境,将jar提交给远程服务器,需要指定IP,端口号和jar包。

// 创建远程执行环境
StreamExecutionEnvironment remoteEnv = StreamExecutionEnvironment.createRemoteEnvironment("ha1",6123,"//opt//a.jar");

Source

fromCollection

从集合中读取数据

package com.ts.flink;

// 传感器温度读数的数据类型
public class SensorReading {
    // 属性:id,时间戳,温度值
    private String id;
    private Long timestamp;
    private Double temperature;

    public SensorReading() {
    }

    public SensorReading(String id, Long timestamp, Double temperature) {
        this.id = id;
        this.timestamp = timestamp;
        this.temperature = temperature;
    }

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public Long getTimestamp() {
        return timestamp;
    }

    public void setTimestamp(Long timestamp) {
        this.timestamp = timestamp;
    }

    public Double getTemperature() {
        return temperature;
    }

    public void setTemperature(Double temperature) {
        this.temperature = temperature;
    }

    @Override
    public String toString() {
        return "SensorReading{" +
                "id='" + id + '\'' +
                ", timestamp=" + timestamp +
                ", temperature=" + temperature +
                '}';
    }
}

----------

package com.ts.flink;

import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.util.Arrays;

public class SourceTest {
    public static void main(String[] args) throws Exception{
        // 创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 从集合中读取数据
        DataStream<SensorReading> dataStream = env.fromCollection(Arrays.asList(
                new SensorReading("sensor_1", 1547718199L, 35.8),
                new SensorReading("sensor_6", 1547718201L, 15.4),
                new SensorReading("sensor_7", 1547718202L, 6.7),
                new SensorReading("sensor_10", 1547718205L, 38.1)
        ));

        DataStream<Integer> integerDataStream = env.fromElements(1, 2, 4, 67, 189);

        // 打印输出
        dataStream.print("data");
        integerDataStream.print("int");

        // 执行
        env.execute();
    }
}

----------
int> 1
int> 2
int> 4
int> 67
int> 189
data> SensorReading{id='sensor_1', timestamp=1547718199, temperature=35.8}
data> SensorReading{id='sensor_6', timestamp=1547718201, temperature=15.4}
data> SensorReading{id='sensor_7', timestamp=1547718202, temperature=6.7}
data> SensorReading{id='sensor_10', timestamp=1547718205, temperature=38.1}

readTextFile

从文件中读取数据

package com.ts.flink;

import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class SourceTest {
    public static void main(String[] args) throws Exception{
        // 创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 从文件读取数据
        DataStream<String> dataStream = env.readTextFile("..\\resources\\sensor.txt");

        // 打印输出
        dataStream.print();

        // 执行
        env.execute();
    }
}

----------
sensor_1,1547718199,35.8
sensor_6,1547718201,15.4
sensor_7,1547718202,6.7
sensor_10,1547718205,38.1
sensor_1,1547718207,36.3
sensor_1,1547718209,32.8
sensor_1,1547718212,37.1

FlinkKafkaConsumer

从kafka中读取数据

package com.ts.flink;

import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;

import java.util.Properties;

public class SourceTest {
    public static void main(String[] args) throws Exception{
        // 创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "192.168.110.110:9092");
        properties.setProperty("group.id", "consumer-group");
        properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        properties.setProperty("auto.offset.reset", "latest");

        // 从文件读取数据  管道名称  连接方法  配置信息
        DataStream<String> dataStream = env.addSource( new FlinkKafkaConsumer<String>("sensor", new SimpleStringSchema(), properties));

        // 打印输出
        dataStream.print();

        // 执行
        env.execute();
    }
}

image-20201231112444563

image-20201231112501759

SourceFunction

自定义数据来源

package com.ts.flink;

import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;

import java.util.HashMap;
import java.util.Random;

public class SourceTest {
    public static void main(String[] args) throws Exception{
        // 创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        DataStream<SensorReading> dataStream = env.addSource( new MySensorSource() );

        // 打印输出
        dataStream.print();

        env.execute();
    }

    // 实现自定义的SourceFunction
    public static class MySensorSource implements SourceFunction<SensorReading> {
        // 定义一个标识位,用来控制数据的产生
        private boolean running = true;

        @Override
        public void run(SourceContext<SensorReading> ctx) throws Exception {
            // 定义一个随机数发生器
            Random random = new Random();

            // 设置10个传感器的初始温度
            HashMap<String, Double> sensorTempMap = new HashMap<>();
            for( int i = 0; i < 10; i++ ){
                sensorTempMap.put("sensor_" + (i+1), 60 + random.nextGaussian() * 20);
            }

            while (running){
                for( String sensorId: sensorTempMap.keySet() ){
                    // 在当前温度基础上随机波动
                    Double newtemp = sensorTempMap.get(sensorId) + random.nextGaussian();
                    sensorTempMap.put(sensorId, newtemp);
                    ctx.collect(new SensorReading(sensorId, System.currentTimeMillis(), newtemp));
                }
                // 控制输出频率
                Thread.sleep(1000L);
            }
        }

        @Override
        public void cancel() {
            running = false;
        }
    }
}

----------
SensorReading{id='sensor_10', timestamp=1609393775349, temperature=61.48794806847098}
SensorReading{id='sensor_4', timestamp=1609393775349, temperature=41.17315799659017}
SensorReading{id='sensor_1', timestamp=1609393775349, temperature=60.646840931571724}
SensorReading{id='sensor_2', timestamp=1609393775349, temperature=76.26340337275492}
SensorReading{id='sensor_7', timestamp=1609393775349, temperature=71.9787630892099}
SensorReading{id='sensor_8', timestamp=1609393775349, temperature=57.1961185158635}
SensorReading{id='sensor_5', timestamp=1609393775349, temperature=49.45034930571155}
SensorReading{id='sensor_6', timestamp=1609393775349, temperature=90.86870265386753}
SensorReading{id='sensor_9', timestamp=1609393775349, temperature=56.67807942587405}

Transform

Map

1对1转换。

image-20201231135930774

package com.ts.flink;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class TransformTest {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 从文件读取数据
        DataStream<String> inputStream = env.readTextFile("..\\resources\\sensor.txt");
        // map,把String转换成长度输出
        DataStream<Integer> mapStream = inputStream.map(new MapFunction<String, Integer>() {
            @Override
            public Integer map(String value) throws Exception {
                return value.length();
            }
        });

        // 打印输出
        mapStream.print("map");
        env.execute();
    }
}

----------
map> 24
map> 24
map> 23
map> 25
map> 24
map> 24
map> 24

flatMap

1对多转换。

E6iVM8KHzYrDTy2

package com.ts.flink;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

public class TransformTest {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 从文件读取数据
        DataStream<String> inputStream = env.readTextFile("..\\resources\\sensor.txt");

        // flatmap,按逗号分字段
        DataStream<String> flatMapStream = inputStream.flatMap(new FlatMapFunction<String, String>() {
            @Override
            public void flatMap(String value, Collector<String> out) throws Exception {
                String[] fields = value.split(",");
                for( String field: fields )
                    out.collect(field);
            }
        });

        // 打印输出
        flatMapStream.print("flatMap");

        env.execute();
    }
}

----------
flatMap> sensor_1
flatMap> 1547718199
flatMap> 35.8
flatMap> sensor_6
flatMap> 1547718201
flatMap> 15.4
flatMap> sensor_7
flatMap> 1547718202
flatMap> 6.7
flatMap> sensor_10
flatMap> 1547718205
flatMap> 38.1
flatMap> sensor_1
flatMap> 1547718207
flatMap> 36.3
flatMap> sensor_1
flatMap> 1547718209
flatMap> 32.8
flatMap> sensor_1
flatMap> 1547718212
flatMap> 37.1

filter

过滤转换。

image-20201231140031077

package com.ts.flink;

import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

public class TransformTest {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 从文件读取数据
        DataStream<String> inputStream = env.readTextFile("..\\resources\\sensor.txt");

        // filter, 筛选sensor_1开头的id对应的数据
        DataStream<String> filterStream = inputStream.filter(new FilterFunction<String>() {
            @Override
            public boolean filter(String value) throws Exception {
                return value.startsWith("sensor_1,");
            }
        });

        // 打印输出
        filterStream.print("filter");

        env.execute();
    }
}

----------
filter> sensor_1,1547718199,35.8
filter> sensor_1,1547718207,36.3
filter> sensor_1,1547718209,32.8
filter> sensor_1,1547718212,37.1

KeyBy

分组转换。

image-20201231140440813

package com.ts.flink;

import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class TransformTest {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 从文件读取数据
        DataStream<String> inputStream = env.readTextFile("..\\resources\\sensor.txt");

        DataStream<SensorReading> dataStream = inputStream.map( line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2]));
        } );

        KeyedStream<SensorReading, String> keyedStream = dataStream.keyBy(data -> data.getId());

        keyedStream.print();
        env.execute();
    }
}

----------
SensorReading{id='sensor_1', timestamp=1547718199, temperature=35.8}
SensorReading{id='sensor_6', timestamp=1547718201, temperature=15.4}
SensorReading{id='sensor_7', timestamp=1547718202, temperature=6.7}
SensorReading{id='sensor_10', timestamp=1547718205, temperature=38.1}
SensorReading{id='sensor_1', timestamp=1547718207, temperature=36.3}
SensorReading{id='sensor_1', timestamp=1547718209, temperature=32.8}
SensorReading{id='sensor_1', timestamp=1547718212, temperature=37.1}

Rolling Aggregation

sum,max,min,maxby,minby。

都是常用的聚合操作,需要和KeyBy合用。

package com.ts.flink;

import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class TransformTest {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 从文件读取数据
        DataStream<String> inputStream = env.readTextFile("..\\resources\\sensor.txt");

        DataStream<SensorReading> dataStream = inputStream.map( line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2]));
        } );

        KeyedStream<SensorReading, String> keyedStream = dataStream.keyBy(data -> data.getId());

        // sum
        DataStream<SensorReading> sumStream = keyedStream.sum("temperature");
        sumStream.print("sum");

        // max
        DataStream<SensorReading> maxStream = keyedStream.max("temperature");
        maxStream.print("max");

        // min
        DataStream<SensorReading> minStream = keyedStream.min("temperature");
        minStream.print("min");

        // maxBy
        DataStream<SensorReading> maxByStream = keyedStream.maxBy("temperature");
        maxByStream.print("maxBy");

        // minBy
        DataStream<SensorReading> minByStream = keyedStream.minBy("temperature");
        minByStream.print("minBy");

        env.execute();
    }
}

----------
sum> SensorReading{id='sensor_1', timestamp=1547718199, temperature=35.8}
sum> SensorReading{id='sensor_6', timestamp=1547718201, temperature=15.4}
sum> SensorReading{id='sensor_7', timestamp=1547718202, temperature=6.7}
sum> SensorReading{id='sensor_10', timestamp=1547718205, temperature=38.1}
sum> SensorReading{id='sensor_1', timestamp=1547718199, temperature=72.1}
sum> SensorReading{id='sensor_1', timestamp=1547718199, temperature=104.9}
sum> SensorReading{id='sensor_1', timestamp=1547718199, temperature=142.0}

max> SensorReading{id='sensor_1', timestamp=1547718199, temperature=35.8}
max> SensorReading{id='sensor_6', timestamp=1547718201, temperature=15.4}
max> SensorReading{id='sensor_7', timestamp=1547718202, temperature=6.7}
max> SensorReading{id='sensor_10', timestamp=1547718205, temperature=38.1}
max> SensorReading{id='sensor_1', timestamp=1547718199, temperature=36.3}
max> SensorReading{id='sensor_1', timestamp=1547718199, temperature=36.3}
max> SensorReading{id='sensor_1', timestamp=1547718199, temperature=37.1}

min> SensorReading{id='sensor_1', timestamp=1547718199, temperature=35.8}
min> SensorReading{id='sensor_6', timestamp=1547718201, temperature=15.4}
min> SensorReading{id='sensor_7', timestamp=1547718202, temperature=6.7}
min> SensorReading{id='sensor_10', timestamp=1547718205, temperature=38.1}
min> SensorReading{id='sensor_1', timestamp=1547718199, temperature=35.8}
min> SensorReading{id='sensor_1', timestamp=1547718199, temperature=32.8}
min> SensorReading{id='sensor_1', timestamp=1547718199, temperature=32.8}

maxBy> SensorReading{id='sensor_1', timestamp=1547718199, temperature=35.8}
maxBy> SensorReading{id='sensor_6', timestamp=1547718201, temperature=15.4}
maxBy> SensorReading{id='sensor_7', timestamp=1547718202, temperature=6.7}
maxBy> SensorReading{id='sensor_10', timestamp=1547718205, temperature=38.1}
maxBy> SensorReading{id='sensor_1', timestamp=1547718207, temperature=36.3}
maxBy> SensorReading{id='sensor_1', timestamp=1547718207, temperature=36.3}
maxBy> SensorReading{id='sensor_1', timestamp=1547718212, temperature=37.1}

minBy> SensorReading{id='sensor_1', timestamp=1547718199, temperature=35.8}
minBy> SensorReading{id='sensor_6', timestamp=1547718201, temperature=15.4}
minBy> SensorReading{id='sensor_7', timestamp=1547718202, temperature=6.7}
minBy> SensorReading{id='sensor_10', timestamp=1547718205, temperature=38.1}
minBy> SensorReading{id='sensor_1', timestamp=1547718199, temperature=35.8}
minBy> SensorReading{id='sensor_1', timestamp=1547718209, temperature=32.8}
minBy> SensorReading{id='sensor_1', timestamp=1547718209, temperature=32.8}

reduce

package com.ts.flink;


import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class TransformTest {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 从文件读取数据
        DataStream<String> inputStream = env.readTextFile("..\\resources\\sensor.txt");

        DataStream<SensorReading> dataStream = inputStream.map( line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2]));
        } );

        KeyedStream<SensorReading, String> keyedStream = dataStream.keyBy(data -> data.getId());

        // reduce 聚合,取最小的温度值,并输出当前的时间戳
        DataStream<SensorReading> reduceStream = keyedStream.reduce(new
            ReduceFunction<SensorReading>() {
                @Override
                public SensorReading reduce(SensorReading value1, SensorReading value2)
                        throws Exception {
                    return new SensorReading(
                            value1.getId(),
                            value2.getTimestamp(),
                            Math.min(value1.getTemperature(), value2.getTemperature()));
                }
            }
        );
        
        reduceStream.print("reduce");

        env.execute();
    }
}

----------

reduce> SensorReading{id='sensor_1', timestamp=1547718199, temperature=35.8}
reduce> SensorReading{id='sensor_6', timestamp=1547718201, temperature=15.4}
reduce> SensorReading{id='sensor_7', timestamp=1547718202, temperature=6.7}
reduce> SensorReading{id='sensor_10', timestamp=1547718205, temperature=38.1}
reduce> SensorReading{id='sensor_1', timestamp=1547718207, temperature=35.8}
reduce> SensorReading{id='sensor_1', timestamp=1547718209, temperature=32.8}
reduce> SensorReading{id='sensor_1', timestamp=1547718212, temperature=32.8}

Split on select

package com.ts.flink;

import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SplitStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.util.Collections;

public class TransformTest {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 从文件读取数据
        DataStream<String> inputStream = env.readTextFile("..\\resources\\sensor.txt");

        // 转换成SensorReading
        DataStream<SensorReading> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2]));
        } );

        // 分流,按照温度值30度为界分为两条流
        SplitStream<SensorReading> splitStream = dataStream.split(new OutputSelector<SensorReading>() {
            @Override
            public Iterable<String> select(SensorReading value) {
                return (value.getTemperature() > 30) ? Collections.singletonList("high") : Collections.singletonList("low");
            }
        });
        
		// 查询,查询不同的流
        DataStream<SensorReading> highTempStream = splitStream.select("high");
        DataStream<SensorReading> lowTempStream = splitStream.select("low");
        DataStream<SensorReading> allTempStream = splitStream.select("high", "low");

        highTempStream.print("high");
        lowTempStream.print("low");
        allTempStream.print("all");

        env.execute();
    }
}

----------
    
high> SensorReading{id='sensor_1', timestamp=1547718199, temperature=35.8}
all> SensorReading{id='sensor_1', timestamp=1547718199, temperature=35.8}
all> SensorReading{id='sensor_6', timestamp=1547718201, temperature=15.4}
low> SensorReading{id='sensor_6', timestamp=1547718201, temperature=15.4}
all> SensorReading{id='sensor_7', timestamp=1547718202, temperature=6.7}
low> SensorReading{id='sensor_7', timestamp=1547718202, temperature=6.7}
high> SensorReading{id='sensor_10', timestamp=1547718205, temperature=38.1}
all> SensorReading{id='sensor_10', timestamp=1547718205, temperature=38.1}
high> SensorReading{id='sensor_1', timestamp=1547718207, temperature=36.3}
all> SensorReading{id='sensor_1', timestamp=1547718207, temperature=36.3}
high> SensorReading{id='sensor_1', timestamp=1547718209, temperature=32.8}
all> SensorReading{id='sensor_1', timestamp=1547718209, temperature=32.8}
high> SensorReading{id='sensor_1', timestamp=1547718212, temperature=37.1}
all> SensorReading{id='sensor_1', timestamp=1547718212, temperature=37.1}

Connect on CoMap

package com.ts.flink;

import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SplitStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.util.Collections;

public class TransformTest {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 从文件读取数据
        DataStream<String> inputStream = env.readTextFile("..\\resources\\sensor.txt");

        // 转换成SensorReading
        DataStream<SensorReading> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2]));
        } );

        // 分流,按照温度值30度为界分为两条流
        SplitStream<SensorReading> splitStream = dataStream.split(new OutputSelector<SensorReading>() {
            @Override
            public Iterable<String> select(SensorReading value) {
                return (value.getTemperature() > 30) ? Collections.singletonList("high") : Collections.singletonList("low");
            }
        });

        DataStream<SensorReading> highTempStream = splitStream.select("high");
        DataStream<SensorReading> lowTempStream = splitStream.select("low");


        // 合流 connect,将高温流转换成二元组类型,与低温流连接合并之后,输出状态信息
        DataStream<Tuple2<String, Double>> warningStream = highTempStream.map(new MapFunction<SensorReading, Tuple2<String, Double>>() {
            @Override
            public Tuple2<String, Double> map(SensorReading value) throws Exception {
                return new Tuple2<>(value.getId(), value.getTemperature());
            }
        });

        ConnectedStreams<Tuple2<String, Double>, SensorReading> connectedStreams = warningStream.connect(lowTempStream);

        DataStream<Object> resultStream = connectedStreams.map(new CoMapFunction<Tuple2<String, Double>, SensorReading, Object>() {
            @Override
            public Object map1(Tuple2<String, Double> value) throws Exception {
                return new Tuple3<>(value.f0, value.f1, "high temp warning");
            }

            @Override
            public Object map2(SensorReading value) throws Exception {
                return new Tuple2<>(value.getId(), "normal");
            }
        });
        warningStream.print("Connect");
        resultStream.print("CoMap");
        
        env.execute();
    }
}

----------
    
Connect> (sensor_1,35.8)
Connect> (sensor_10,38.1)
Connect> (sensor_1,36.3)
Connect> (sensor_1,32.8)
Connect> (sensor_1,37.1)
CoMap> (sensor_1,35.8,high temp warning)
CoMap> (sensor_6,normal)
CoMap> (sensor_10,38.1,high temp warning)
CoMap> (sensor_7,normal)
CoMap> (sensor_1,36.3,high temp warning)
CoMap> (sensor_1,32.8,high temp warning)
CoMap> (sensor_1,37.1,high temp warning)

Union

package com.ts.flink;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.datastream.ConnectedStreams;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SplitStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.CoMapFunction;

import java.util.Collections;

public class TransformTest {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 从文件读取数据
        DataStream<String> inputStream = env.readTextFile("..\\resources\\sensor.txt");

        // 转换成SensorReading
        DataStream<SensorReading> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2]));
        } );

        // 分流,按照温度值30度为界分为两条流
        SplitStream<SensorReading> splitStream = dataStream.split(new OutputSelector<SensorReading>() {
            @Override
            public Iterable<String> select(SensorReading value) {
                return (value.getTemperature() > 30) ? Collections.singletonList("high") : Collections.singletonList("low");
            }
        });

        DataStream<SensorReading> highTempStream = splitStream.select("high");
        DataStream<SensorReading> lowTempStream = splitStream.select("low");
        DataStream<SensorReading> allTempStream = splitStream.select("high", "low");

        // 3. union联合多条流
        DataStream<SensorReading> unionStream = highTempStream.union(lowTempStream, allTempStream);
        unionStream.print("union");
        
        env.execute();
    }
}

----------

union> SensorReading{id='sensor_1', timestamp=1547718199, temperature=35.8}
union> SensorReading{id='sensor_10', timestamp=1547718205, temperature=38.1}
union> SensorReading{id='sensor_1', timestamp=1547718207, temperature=36.3}
union> SensorReading{id='sensor_1', timestamp=1547718209, temperature=32.8}
union> SensorReading{id='sensor_1', timestamp=1547718212, temperature=37.1}
union> SensorReading{id='sensor_6', timestamp=1547718201, temperature=15.4}
union> SensorReading{id='sensor_7', timestamp=1547718202, temperature=6.7}
union> SensorReading{id='sensor_1', timestamp=1547718199, temperature=35.8}
union> SensorReading{id='sensor_6', timestamp=1547718201, temperature=15.4}
union> SensorReading{id='sensor_7', timestamp=1547718202, temperature=6.7}
union> SensorReading{id='sensor_10', timestamp=1547718205, temperature=38.1}
union> SensorReading{id='sensor_1', timestamp=1547718207, temperature=36.3}
union> SensorReading{id='sensor_1', timestamp=1547718209, temperature=32.8}
union> SensorReading{id='sensor_1', timestamp=1547718212, temperature=37.1}

Sink

Flink 没有类似于 spark 中 foreach 方法,让用户进行迭代的操作。虽有对外的输出操作都要利用 Sink 完成。最后通过类似如下方式完成整个任务最终输出操作。

Kafka

package com.ts.flink;

import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;

import java.util.Properties;

public class KafkaConnector {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "192.168.110.110:9092");
        properties.setProperty("group.id", "consumer-group");
        properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        properties.setProperty("auto.offset.reset", "latest");

        // 从kafka接受数据
        DataStream<String> inputStream = env.addSource( new FlinkKafkaConsumer<String>("sensor", new SimpleStringSchema(), properties));

        // 转换成SensorReading类型
        DataStream<String> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2])).toString();
        });
		
        // 输出到kafka
        dataStream.addSink( new FlinkKafkaProducer<String>("192.168.110.110:9092", "sinktest", new SimpleStringSchema()));

        env.execute();
    }
}

image-20210104155850424

image-20210104160059014

Redis

导入jar包:flink-connector-redis_2.10-1.1.5.jar

package com.ts.flink;

import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;

public class RedisConnector {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
		// 从文件读取数据
        DataStream<String> inputStream = env.readTextFile("..\\resources\\sensor.txt");

        // 转换成SensorReading类型
        DataStream<SensorReading> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2]));
        });

        // 定义jedis连接配置
        org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig config = new FlinkJedisPoolConfig.Builder()
                .setHost("localhost")
                .setPort(6379)
                .build();

        dataStream.addSink( new RedisSink<>(config, new MyRedisMapper()));

        env.execute();
    }

    // 自定义RedisMapper
    public static class MyRedisMapper implements RedisMapper<SensorReading> {
        // 定义保存数据到redis的命令,存成Hash表,hset表类型 sensor_temp表名 id字段名 temperature字段名
        @Override
        public RedisCommandDescription getCommandDescription() {
            return new RedisCommandDescription(RedisCommand.HSET, "sensor_temp");
        }

        @Override
        public String getKeyFromData(SensorReading data) {
            return data.getId();
        }

        @Override
        public String getValueFromData(SensorReading data) {
            return data.getTemperature().toString();
        }
    }
}

Elasticsearch

package com.ts.flink;

import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;

import java.util.ArrayList;
import java.util.HashMap;

public class EsConnector {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 从文件读取数据
        DataStream<String> inputStream = env.readTextFile("..\\resources\\sensor.txt");

        // 转换成SensorReading类型
        DataStream<SensorReading> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2]));
        });

        // 定义es的连接配置
        ArrayList<HttpHost> httpHosts = new ArrayList<>();
        httpHosts.add(new HttpHost("localhost", 9200));

        dataStream.addSink(new ElasticsearchSink.Builder<SensorReading>(httpHosts, new MyEsSinkFunction()).build());

        env.execute();
    }

    // 实现自定义的ES写入操作
    public static class MyEsSinkFunction implements ElasticsearchSinkFunction<SensorReading>{
        @Override
        public void process(SensorReading element, RuntimeContext ctx, RequestIndexer indexer) {
            // 定义写入的数据source
            HashMap<String, String> dataSource = new HashMap<>();
            dataSource.put("id", element.getId());
            dataSource.put("temp", element.getTemperature().toString());
            dataSource.put("ts", element.getTimestamp().toString());

            // 创建请求,作为向es发起的写入命令
            IndexRequest indexRequest = Requests.indexRequest()
                    .index("sensor")
                    .type("readingdata")
                    .source(dataSource);

            // 用index发送请求
            indexer.add(indexRequest);
        }
    }
}

JDBC

package com.ts.flink;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;

public class JdbcConnector {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        
        DataStream<SensorReading> dataStream = env.addSource(new SourceTest.MySensorSource());

        dataStream.addSink(new MyJdbcSink());

        env.execute();
    }

    // 实现自定义的SinkFunction
    public static class MyJdbcSink extends RichSinkFunction<SensorReading> {
        // 声明连接和预编译语句
        Connection connection = null;
        PreparedStatement insertStmt = null;
        PreparedStatement updateStmt = null;

        @Override
        public void open(Configuration parameters) throws Exception {
            connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/test", "root", "123456");
            insertStmt = connection.prepareStatement("insert into sensor_temp (id, temp) values (?, ?)");
            updateStmt = connection.prepareStatement("update sensor_temp set temp = ? where id = ?");
        }

        // 每来一条数据,调用连接,执行sql
        @Override
        public void invoke(SensorReading value, Context context) throws Exception {
            // 直接执行更新语句,如果没有更新那么就插入
            updateStmt.setDouble(1, value.getTemperature());
            updateStmt.setString(2, value.getId());
            updateStmt.execute();
            if( updateStmt.getUpdateCount() == 0 ){
                insertStmt.setString(1, value.getId());
                insertStmt.setDouble(2, value.getTemperature());
                insertStmt.execute();
            }
        }

        @Override
        public void close() throws Exception {
            insertStmt.close();
            updateStmt.close();
            connection.close();
        }
    }
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

寒 暄

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值