Flink 流处理API

enviroment --> source --> transform --> sink
第一步:创建运行环境
getExecutionEnvironment 已经做了集成

ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

可以在env 进行并行度配置 如果没有配置 以flink-conf.yaml 中的配置为准,默认为1;

createLocalEnviroment
返回本地执行环境,需要在调用时指定默认的并行度

LocalStreamExecutionEnvironment env= StreamExecutionEnvironment.createLocalEnviroment(1);

createRemoteEnviroment
返回集群执行环境,将jar提交到远程服务器,需要在调用时指定JobManager到ip和端口号,并指定集群中运行到jar包;

LocalStreamExecutionEnvironment env= StreamExecutionEnvironment.createRemoteEnviroment("jobmanage-hostname",6123,"/Users/code/testDemo/target/demo-0.0.1-SNAPSHOT.jar");

2.1 Source
2.1.1从集合中读取数据:

public class SourceTest1_Collection {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //1.Source : 从集合中读取数据
        DataStream<SensorReading> dataStream = env.fromCollection(Arrays.asList(
                new SensorReading("sensor_1", 188888888, 35.0),
                new SensorReading("sensor_1", 177777777, 35.0),
                new SensorReading("sensor_1", 166666666, 35.0),
                new SensorReading("sensor_1", 155555555, 35.0)
        ));
        //或者可以这样
        DataStream<Integer> integerDataStream = env.fromElements(1, 2, 0, 67, 189);
        dataStream.print("data");
        integerDataStream.print("int");
        //执行
        env.execute();
    }
}

2.1.2 从文件中读取数据

public class SourceTest2_File {
    public static void main(String[] args) throws Exception{
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        //从文件中读取数据
        DataStream<String> dataStream = env.readTextFile("/Users/code/testDemo/src/main/resources/sensor.txt");
        //打印输出
        dataStream.print();
        env.execute();
    }
}

从kafka中读取数据
引入依赖:

<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-connector-kafka-0.11_2.12</artifactId>
    <version>1.10.0</version>
</dependency>

public static void main(String[] args) throws Exception{
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","localhost:8080");
        //以下这些可以不指定
//        properties.setProperty("group.id","consumer-group");
//        properties.setProperty("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
//        properties.setProperty("value.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
//        properties.setProperty("auto.offset.reset","latest");
        
        DataStream<String> dataStream = env.addSource(new FlinkKafkaConsumer011<String>("sensor",new SimpleStringSchema(),properties));


        //打印输出
        dataStream.print();
        env.execute();
    }
    

自定义Source:模拟数据

除了以上的source数据来源,我们还可以自定义source,需要做的,只是传入一个SourceFunction就可以,具体调用如下:

public class SourceTest4_UDF {
    public static void main(String[] args) throws Exception{
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        //从文件中读取数据
        DataStream<SensorReading> dataStream = env.addSource(new MySensorSource());
        //打印输出
        dataStream.print();
        env.execute();
    }
    //实现自定义的SourceFunction
    private static class MySensorSource implements SourceFunction<SensorReading> {
        //定义标志位 控制数据的产生
        private boolean running = true;
        @Override
        public void run(SourceContext<SensorReading> sourceContext) throws Exception {
            //定义随机数发生器
            Random random = new Random();
            //设置10个传感器的初始温度
            HashMap<String, Double> sensorTempMap = new HashMap<>();
            for (int i = 0 ; i<10 ; i++) {
                //nextGaussian +-3之间
                sensorTempMap.put("sensor_"+ (i+1),60 + random.nextGaussian()*20);
            }
            while (running) {
                for (String sensorId : sensorTempMap.keySet()) {
                    // 在当前温度基础上随机波动
                    Double newtemp = sensorTempMap.get(sensorId) + random.nextGaussian();
                    sensorTempMap.put(sensorId,newtemp);
                    sourceContext.collect(new SensorReading(sensorId,System.currentTimeMillis(),newtemp));
                }
                //控制输出频率
                Thread.sleep(1000L);
            }
        }

        @Override
        public void cancel() {
            running = false;
        }
    }
}

3.1 转换算子 : Transform

public class TransformTest1_Base {

    public static void main(String[] args) throws Exception{
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
       // env.setParallelism(1);

        //读取数据
        //从文件中读取数据
        DataStream<String> dataStream = env.readTextFile("/Users/code/testDemo/src/main/resources/sensor.txt");

        //进行数据转换:1.map 把String转换成长度输出
        DataStream<Integer> mapStream = dataStream.map(new MapFunction<String, Integer>() {
            @Override
            public Integer map(String value) throws Exception {
                return value.length();
            }
        });
        //2.flatmap 按逗号进行分词
        DataStream<String> flatMapStream = dataStream.flatMap(new FlatMapFunction<String, String>() {
            @Override
            public void flatMap(String value, Collector<String> out) throws Exception {
                String[] split = value.split(",");
                for (String s : split) {
                    out.collect(s);
                }
            }
        });
        //3.filter, 比如筛选sensor_1开头的id对应的数据
        DataStream<String> filterStream = dataStream.filter(new FilterFunction<String>() {
            @Override
            public boolean filter(String value) throws Exception {

                return value.startsWith("sensor_1");
            }
        });
        //4.打印输出
        dataStream.print("map");
        dataStream.print("flatmap");
        dataStream.print("filter");

        env.execute();
    }

}

聚合统计:KeyBy
DataStream --> KeyedStream:逻辑地将一个流拆分成不相交的分区,每个分区包含相同key的元素,在内部以hash的形式实现的

public class TransformTest2_RollingAggregation {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        //读取数据
        //从文件中读取数据
        DataStream<String> inputStream = env.readTextFile("/Users/code/testDemo/src/main/resources/sensor.txt");

        //转换成sensorReading类型
//        SingleOutputStreamOperator<SensorReading> dataStream = inputStream.map(new MapFunction<String, SensorReading>() {
//            @Override
//            public SensorReading map(String value) throws Exception {
//                String[] fields = value.split(",");
//                return new SensorReading(fields[0],new Long(fields[1]),new Double(fields[2]));
//            }
//        });

        SingleOutputStreamOperator<SensorReading> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
               return new SensorReading(fields[0],new Long(fields[1]),new Double(fields[2]));
        });
        //进行分组
        KeyedStream<SensorReading, Tuple> keyedStream = dataStream.keyBy("id");

//        KeyedStream<SensorReading, String> keyedStream1 = dataStream.keyBy(data -> data.getId());
//        KeyedStream<SensorReading, String> keyedStream1 = dataStream.keyBy(SensorReading :: getId);

        //进行滚动集合 据当前最大温度值
       // SingleOutputStreamOperator<SensorReading> resultStream = keyedStream.max("temperature");
        SingleOutputStreamOperator<SensorReading> resultStream = keyedStream.maxBy("temperature");

        resultStream.print();

        env.execute();
    }
}

Reduce:
DataStream --> KeyedStream:一个分组聚合流的操作,合并当前的元素和上次聚合的结果,产生一个新的值,返回的流中包含每一次聚合的结果,而不是只是返回最后一次的最终结果。

public class TransformTest3_Reduce {
    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        //读取数据
        //从文件中读取数据
        DataStream<String> inputStream = env.readTextFile("/Users/code/testDemo/src/main/resources/sensor.txt");

        SingleOutputStreamOperator<SensorReading> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0],new Long(fields[1]),new Double(fields[2]));
        });
        //进行分组
        KeyedStream<SensorReading, Tuple> keyedStream = dataStream.keyBy("id");

        //reduce聚合,取最大的温度值,以及当前最新的时间戳

        SingleOutputStreamOperator<SensorReading> reduce = keyedStream.reduce(new ReduceFunction<SensorReading>() {
            @Override
            public SensorReading reduce(SensorReading value1, SensorReading value2) throws Exception {
                return new SensorReading(value1.getId(), value2.getTimestamp(), Math.max(value1.getTemperature(), value2.getTemperature()));
            }
        });

        reduce.print();

        env.execute();
    }
}

Split和Select:目前已废弃
SplitStream -> DataStream : 从一个Splitstream中获取一个或者多个DataStream需求,传感器数据按照温度高低,拆分成两个流;

public class TransformTest4_MutipleStream {
    public static void main(String[] args) throws Exception{
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        //读取数据
        //从文件中读取数据
        DataStream<String> inputStream = env.readTextFile("/Users/code/testDemo/src/main/resources/sensor.txt");

        SingleOutputStreamOperator<SensorReading> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0],new Long(fields[1]),new Double(fields[2]));
        });
        //进行分流操作:按照温度值30度为界分成2调流
        SplitStream<SensorReading> splitStream = dataStream.split(new OutputSelector<SensorReading>() {
            @Override
            public Iterable<String> select(SensorReading value) {
                return (value.getTemperature() > 30 ) ? Collections.singletonList("high") :Collections.singletonList("low");
            }
        });

        DataStream<SensorReading> high = splitStream.select("high");
        DataStream<SensorReading> low = splitStream.select("low");
        DataStream<SensorReading> all = splitStream.select("high","low");

        high.print("high");
        low.print("low");
        all.print("all");
        env.execute();
    }
}

Connect和CoMap:缺陷只能连两条流 数据类型可以不一样
DataStream,DataStream -> ConnectedStreams; 连接两个保持他们类型的数据流,两个数据流被Connect之后,只是被放在了一个同一个流中,内部依然被保持各自的数据和形式不发生任何改变,两个流相互独立;

public class TransformTest4_MutipleStream {
    public static void main(String[] args) throws Exception{
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        //读取数据
        //从文件中读取数据
        DataStream<String> inputStream = env.readTextFile("/Users/code/testDemo/src/main/resources/sensor.txt");

        SingleOutputStreamOperator<SensorReading> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0],new Long(fields[1]),new Double(fields[2]));
        });
        //进行分流操作:按照温度值30度为界分成2调流
        SplitStream<SensorReading> splitStream = dataStream.split(new OutputSelector<SensorReading>() {
            @Override
            public Iterable<String> select(SensorReading value) {
                return (value.getTemperature() > 30 ) ? Collections.singletonList("high") :Collections.singletonList("low");
            }
        });

        DataStream<SensorReading> high = splitStream.select("high");
        DataStream<SensorReading> low = splitStream.select("low");
        DataStream<SensorReading> all = splitStream.select("high","low");

        //进行合流:将将高温流转换成二元祖类型,与低温流连接合并输出一个状态信息
        DataStream<Tuple2<String, Double>> warningStream = high.map(new MapFunction<SensorReading, Tuple2<String, Double>>() {
            @Override
            public Tuple2<String, Double> map(SensorReading value) throws Exception {
                return new Tuple2<>(value.getId(), value.getTemperature());
            }
        });

        ConnectedStreams<Tuple2<String, Double>, SensorReading> connectStream = warningStream.connect(low);
        DataStream<Object> mapStream = connectStream.map(new CoMapFunction<Tuple2<String, Double>, SensorReading, Object>() {
            @Override
            public Object map1(Tuple2<String, Double> value) throws Exception {
                return new Tuple3<>(value.f0, value.f1, "high temp warning");
            }

            @Override
            public Object map2(SensorReading value) throws Exception {
                return new Tuple2<>(value.getId(), "normal");
            }
        });
        mapStream.print();
        env.execute();
    }
}

Union 缺陷 数据类型必须一样
DataStream -> DataStream: 对两个或者两个以上对DataStream进行union操作,产生一个包含所有DataStream元素对新DataStream。

public static void main(String[] args) throws Exception{
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        //读取数据
        //从文件中读取数据
        DataStream<String> inputStream = env.readTextFile("/Users/code/testDemo/src/main/resources/sensor.txt");

        SingleOutputStreamOperator<SensorReading> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0],new Long(fields[1]),new Double(fields[2]));
        });
        //进行分流操作:按照温度值30度为界分成2调流
        SplitStream<SensorReading> splitStream = dataStream.split(new OutputSelector<SensorReading>() {
            @Override
            public Iterable<String> select(SensorReading value) {
                return (value.getTemperature() > 30 ) ? Collections.singletonList("high") :Collections.singletonList("low");
            }
        });

        DataStream<SensorReading> high = splitStream.select("high");
        DataStream<SensorReading> low = splitStream.select("low");
        DataStream<SensorReading> all = splitStream.select("high","low");
        
        //3.union 联合多条流
        DataStream<SensorReading> union = high.union(low, all);
        union.print();
        env.execute();
    }

Flink支持所有对基础数据类型
实现UDF函数类------更细粒度的控制流
自定义类 实现接口
富函数(Rich Functions)
“富函数” 是DataStream API 提供的一个函数类的接口,所有的Flink函数类都有器Rich版本,它与常规的函数不同在于,可以获取运行环境的上下文,并拥有一些生命周期方法,所以可以实现更复杂的功能;
RichMapFunctions
RichFlatMapFunctions
RichFilterFunctions
open()方法是rich function 的初始化方法
close()销毁

public class TansformTest6_RichFunction {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        //读取数据
        //从文件中读取数据
        DataStream<String> inputStream = env.readTextFile("/Users/code/testDemo/src/main/resources/sensor.txt");

        SingleOutputStreamOperator<SensorReading> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0],new Long(fields[1]),new Double(fields[2]));
        });
        DataStream<Tuple2<String, Integer>> resultStream = dataStream.map(new Mymapper());
        resultStream.print();
        env.execute();
    }

    private static class Mymapper0 implements MapFunction<SensorReading,Tuple2<String,Integer>> {
        @Override
        public Tuple2<String, Integer> map(SensorReading value) throws Exception {
            return new Tuple2<>(value.getId(),value.getId().length());
        }
    }

    //实现自定义富函数类
    private static class Mymapper extends RichMapFunction<SensorReading,Tuple2<String,Integer>> {
        @Override
        public Tuple2<String, Integer> map(SensorReading value) throws Exception {

            return new Tuple2<>(value.getId(),getRuntimeContext().getIndexOfThisSubtask());
        }

        @Override
        public void open(Configuration parameters) throws Exception {
            //初始化工作 一般是定义状态,或者 统一跟数据库进行连接
            System.out.println("open()");

        }

        @Override
        public void close() throws Exception {
            //一般是关闭连接和清空状态的收尾操作
            System.out.println("close()");
        }
    }
}
public static void main(String[] args) throws Exception{
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //env.setParallelism(1);
        //读取数据
        //从文件中读取数据
        DataStream<String> inputStream = env.readTextFile("/Users/code/testDemo/src/main/resources/sensor.txt");
        DataStream<SensorReading> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0],new Long(fields[1]),new Double(fields[2]));
        });
        inputStream.print("input");

        //1.shufle
        DataStream<String> shuffleStream = inputStream.shuffle();
        shuffleStream.print("shuffle");
        //2.keyBy
        dataStream.keyBy("id").print("keyBy");
        //3.global 都是在一个分区
        dataStream.global().print("global");
        env.execute();
    }

Sink
Flink没有类似于spark中的foreach方法,让用户进行迭代的操作,虽然对外的输出操作都要利用Sink完成,最后通过类似如下方式完成整个任务最终输出操作;
stream.addSink(new MySink(xxx))
搭建好kafka 启动即可使用

public class SinkTest1_kafka {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        //从文件中读取数据
        DataStream<String> inputStream = env.readTextFile("/Users/code/testDemo/src/main/resources/sensor.txt");
        DataStream<String> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0],new Long(fields[1]),new Double(fields[2])).toString();
        });

        dataStream.addSink(new FlinkKafkaProducer011<String>("localhost : 8080","sinktest",new SimpleStringSchema()));

        env.execute();
    }
}

redis 使用

<dependency>
    <groupId>org.apache.bahir</groupId>
    <artifactId>flink-connector-redis_2.11</artifactId>
    <version>1.0</version>
</dependency>
public class SinkTest1_redis {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        //从文件中读取数据
        DataStream<String> inputStream = env.readTextFile("/Users/code/testDemo/src/main/resources/sensor.txt");
        DataStream<SensorReading> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0],new Long(fields[1]),new Double(fields[2]));
        });
        //定义jedis连接配置
        FlinkJedisPoolConfig config = new FlinkJedisPoolConfig.Builder()
                .setHost("localhost")
                .setPort(6379)
                .build();

        dataStream.addSink(new RedisSink<>(config, new RedisMapper<SensorReading>() {
            //定义保存数据到redis命令,存成Hash表,hset sensor_temp id temperature
            @Override
            public RedisCommandDescription getCommandDescription() {
                return new RedisCommandDescription(RedisCommand.HSET,"sensor_temp");
            }

            @Override
            public String getKeyFromData(SensorReading data) {
                return data.getId();
            }

            @Override
            public String getValueFromData(SensorReading data) {
                return data.getTemperature().toString();
            }
        }));
        
        env.execute();
    }
}

Elasticsearch

<dependency>
   <groupId>org.apache.flink</groupId>
   <artifactId>flink-connector-elasticsearch6_2.12</artifactId>
   <version>1.10.1</version>
</dependency>

public class SinkTest3_es {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        //从文件中读取数据
        DataStream<String> inputStream = env.readTextFile("/Users/code/testDemo/src/main/resources/sensor.txt");
        DataStream<SensorReading> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0],new Long(fields[1]),new Double(fields[2]));
        });
        //
        ArrayList<HttpHost> httpHosts = new ArrayList<>();
       httpHosts.add(new HttpHost("localhost",9200));

        //dataStream.addSink(new ElasticsearchSink.Builder<SensorReading>(httpHosts, new MyEsSinkFunction()));
          dataStream.addSink((SinkFunction<SensorReading>) new ElasticsearchSink.Builder<SensorReading>(httpHosts,new MyEsSinkFunction()));
        env.execute();
    }
    //实现自定义的ES写入操作
    private static class MyEsSinkFunction implements ElasticsearchSinkFunction<SensorReading> {
        @Override
        public void process(SensorReading element, RuntimeContext ctx, RequestIndexer indexer) {
            //定义写入的数据source
            HashMap<String, String> dataSource = new HashMap<>();
            dataSource.put("id",element.getId());
            dataSource.put("tmp",element.getTemperature().toString());
            dataSource.put("ts",element.getTimestamp().toString());

            //创建请求,作为向es发起的写入命令
            IndexRequest source = Requests.indexRequest()
                    .index("sensor")
                    .type("readingdata")
                    .source("dataSorce");
            //用index 发送请求
            indexer.add(source);

        }
    }
}

Jdbc

public class SinkTest4_Jdbc {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        //从文件中读取数据
        DataStream<String> inputStream = env.readTextFile("/Users/code/testDemo/src/main/resources/sensor.txt");
        DataStream<SensorReading> dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorReading(fields[0],new Long(fields[1]),new Double(fields[2]));
        });

        dataStream.addSink(new MyJdbcSink());

        env.execute();
    }
    //实现自定义连接
    private static class MyJdbcSink extends RichSinkFunction<SensorReading> {
       Connection connection = null;
       PreparedStatement insert = null;
       PreparedStatement update = null;

        @Override
        public void open(Configuration parameters) throws Exception {
            connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/test","root","root");
            insert = connection.prepareStatement("insert into sensor_temp(id,tmp) values(?,?)");
            update = connection.prepareStatement("update sensor_temp set temp = ? where id = ?");
        }
        //每来一条数据,调用连接,执行sql
        @Override
        public void invoke(SensorReading value, Context context) throws Exception {
            //直接执行更新语句,如果没有更新那么就插入
            update.setDouble(1,value.getTemperature());
            update.setString(2,value.getId());
            update.execute();
            if (update.getUpdateCount() == 0) {
                insert.setString(1,value.getId());
                insert.setDouble(2,value.getTemperature());
                insert.execute();
            }
        }

        @Override
        public void close() throws Exception {
            insert.close();
            update.close();
            connection.close();
        }
    }

}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值