Flink入门第二课:Flink DataStream api读取各种数据源

一、读取Kafka

先添加kafka连接器的依赖

<!--kafka-connector连接器 1.10.1是连接器版本,和flink一致 0.11是kafka版本 2.11是scala版本-->
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-connector-kafka-0.11_2.11</artifactId>
    <version>1.10.1</version>
</dependency>
package com.atguigu.Adatastream_api.source;

import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;


import java.util.Properties;

/**
 * 需要引入kafka-connector的依赖
 * 运行时先启动zkServer,再启动kafka-server,最后启动kafak-producer,在producer中生产数据,这儿能消费到
 * zkServer.sh start
 * kafka-server-start.sh config/server.properties &
 * kafka-console-producer.sh -broker-list Linux001:9092 --topic t001    会阻塞
 * 
 * 读取kafka数据源数据,如果想要从头读取,必须满足两个条件:
 *      第一:消费者组必须是最新的,即这个消费者组没有消费过。
 *      第二:消费策略auto.offset.reset的值必须是earliest.
 */
public class CKafkaSource {
    public static void main(String[] args) throws Exception {
        //创建环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        /**
         * kafka传递而来的消息有很多字段,具体的数据在value字段中,key字段是kafka分区,
         */
        Properties ps=new Properties();
            ps.setProperty("bootstrap.servers","localhost:9092");//集群地址
            ps.setProperty("group.id", "consumer-group");//消费者组
            ps.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");//key序列化方式
            ps.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");//value序列化方式
            ps.setProperty("auto.offset.reset", "latest");//消费策略
        //其实第二个参数指定了序列化方式,那key和value的序列化方式就不用指定了
        DataStream result = env.addSource(new FlinkKafkaConsumer011<String>("sensor", new SimpleStringSchema(), ps));

        //输出并执行
        result.print("kafka数据:");
        env.execute("消费kafka数据");

    }
}

二、读取集合

package com.atguigu.Adatastream_api.source;

import com.atguigu.Fbeans.SensorReading;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.util.Arrays;

public class BReadFromCollection {
    public static void main(String[] args) throws Exception {
        //创建环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        /**
         * 从集合读取数据
         */
        DataStreamSource<SensorReading> inputStream1 = env.fromCollection(Arrays.asList(
                new SensorReading("sensor_1", 1547718199L, 35.8),
                new SensorReading("sensor_6", 1547718201L, 15.4),
                new SensorReading("sensor_1", 1547718202L, 16.7),
                new SensorReading("sensor_1", 1547718205L, 38.3)
        ));
        DataStream<Integer> inputStream2 = env.fromElements(1, 2, 4, 67,189);

        //输出结果
        inputStream1.print("inputStream1");
        inputStream2.print("inputStream2");

        env.execute("Read from Collection");
    }
}

三、读取自定义数据源

package com.atguigu.Adatastream_api.source;


import com.atguigu.Fbeans.SensorReading;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;

import java.util.HashMap;
import java.util.Random;


/**
 * 用户自定义数据源
 */
public class DUserdefinedSource {
    public static void main(String[] args) throws Exception {
        //创建环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        /*
            读取自定义数据源的数据
         */
        DataStreamSource<SensorReading> result = env.addSource(new RichParallelSourceFunction<SensorReading>() {
            private boolean running = true;//控制数据生成的标识位

            //生成数据
            @Override
            public void run(SourceContext<SensorReading> sct) throws Exception {
                HashMap<String, Double> map = new HashMap();
                Random random = new Random();

                for (int i = 0; i < 10; i++) {//生成10个传感器的基础温度
                    map.put("sensor_" + (i + 1), 60 + random.nextGaussian() * 20);
                }

                while (running) {//10个传感器温度波动,生成最终带事件时间的数据
                    for (String sensor : map.keySet()) {
                        map.put(sensor, map.get(sensor) + random.nextGaussian()*3);
                        sct.collect(new SensorReading(sensor, System.currentTimeMillis(), map.get(sensor)));
                    }
                }

                Thread.sleep(1000L);
            }

            //取消数据生成
            @Override
            public void cancel() {
                running = false;
            }
        });
        //处理数据

        //输出数据
        result.print("自定义数据源");
        env.execute(" 自定义数据源");
    }
}

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

二百四十九先森

你的打赏是我努力的最大动力~

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值