大数据学习之Flink——06Data Source

Flink 做为一款流式计算框架,它可用来做批处理,即处理静态的数据集、历史的数据集;也可以用来做流处理,即实时的处理些实时数据流,实时的产生数据流结果,只要数据源源不断的过来,Flink 就能够一直计算下去,这个 Data Sources 就是数据的来源地。

一. 基于集合

1. fromCollection(Collection)

从 Collection 创建数据流。集合中的所有元素类型必须相同。

  1. scala代码

    package com.hjf.dataSource
    
    import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
    object DataSourceForCollection {
      def main(args: Array[String]): Unit = {
          val streamEnv: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
          streamEnv.setParallelism(1)
          import org.apache.flink.streaming.api.scala._
          val stream: DataStream[String] = streamEnv.fromCollection(Array(
            "张三", "李四", "张三", "王五"
          ))
          stream.map((_, 1)).keyBy(0).sum(1).print()
          streamEnv.execute()
        }
    }
    
  2. java代码

    package com.hjf.source;
    
    import org.apache.flink.api.common.functions.FlatMapFunction;
    import org.apache.flink.api.java.tuple.Tuple2;
    import org.apache.flink.streaming.api.datastream.DataStreamSource;
    import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
    import org.apache.flink.util.Collector;
    import java.util.Arrays;
    public class DataSourceForCollection {
        public static void main(String[] args) throws Exception {
            StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
            streamEnv.setParallelism(1);
            DataStreamSource<String> stream = streamEnv.fromCollection(Arrays.asList("张三", "李四", "张三", "王五"));
            stream.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
                @Override
                public void flatMap(String s, Collector<Tuple2<String, Integer>> out) throws Exception {
                    String[] arr = s.split(" ");
                    for (String ele: arr) {
                        if (ele.length() > 0) {
                            out.collect(new Tuple2<String, Integer>(ele, 1));
                        }
                    }
                }
            }).keyBy(0).sum(1).print();
            streamEnv.execute("collect");
        }
    }
    
    
  3. 运行结果
    在这里插入图片描述

2. fromElements(T …)

从给定的对象序列中创建数据流。所有对象类型必须相同。

  1. scala代码

    package com.hjf.dataSource
    
    import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
    object DataSourceForCollection {
      def main(args: Array[String]): Unit = {
          val streamEnv: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
          streamEnv.setParallelism(1)
          import org.apache.flink.streaming.api.scala._
          val stream: DataStream[String] = streamEnv.fromElements("hello", "word", "spark", "word")
          stream.map((_, 1)).keyBy(0).sum(1).print()
          streamEnv.execute()
        }
    }
    
  2. java代码

    package com.hjf.source;
    
    import org.apache.flink.api.common.functions.FlatMapFunction;
    import org.apache.flink.api.java.tuple.Tuple2;
    import org.apache.flink.streaming.api.datastream.DataStreamSource;
    import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
    import org.apache.flink.util.Collector;
    
    public class DataSourceForElement {
        public static void main(String[] args) throws Exception {
            StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
            streamEnv.setParallelism(1);
            DataStreamSource<String> stream = streamEnv.fromElements("张三", "李四", "张三", "王五");
            stream.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
                @Override
                public void flatMap(String s, Collector<Tuple2<String, Integer>> out) throws Exception {
                    String[] arr = s.split(" ");
                    for (String ele: arr) {
                        if (ele.length() > 0) {
                            out.collect(new Tuple2<String, Integer>(ele, 1));
                        }
                    }
                }
            }).keyBy(0).sum(1).print();
            streamEnv.execute("collect");
        }
    }
    
    
  3. 运行结果:
    在这里插入图片描述

二. 基于文件

1. 本地文件
  1. scala代码

    package com.hjf.dataSource
    
    import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
    object DataSourceForFile {
      def main(args: Array[String]): Unit = {
        val streamEnv: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
        streamEnv.setParallelism(1)
        import org.apache.flink.streaming.api.scala._
        val inputPath: String = getClass.getResource("/data.txt").getPath
        // 如果这里面直接填路径, 则需要填写绝对路径
        val stream: DataStream[String] = streamEnv.readTextFile(inputPath)
        stream.flatMap(_.split(" ")).map((_, 1)).keyBy(0).sum(1).print()
        streamEnv.execute()
      }
    }
    
    
  2. Java代码

    package com.hjf.source;
    
    import org.apache.flink.api.common.functions.FlatMapFunction;
    import org.apache.flink.api.java.tuple.Tuple2;
    import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
    import org.apache.flink.util.Collector;
    
    public class DataSourceForFile {
        public static void main(String[] args) throws Exception {
            StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
            streamEnv.readTextFile("data/words.txt")
                    .flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
                        @Override
                        public void flatMap(String s, Collector<Tuple2<String, Integer>> out) throws Exception {
                            String[] arr = s.split(" ");
                            for (String ele: arr) {
                                if (ele.length() > 0) {
                                    out.collect(new Tuple2(ele, 1));
                                }
                            }
                        }
                    }).keyBy(0).sum(1).print();
            streamEnv.execute();
        }
    }
    
    
2. HDFS
  1. scala代码

    package com.hjf.dataSource
    
    import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
    object DataSourceForHDFS {
      def main(args: Array[String]): Unit = {
        val streamEnv: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
        streamEnv.setParallelism(1)
        import org.apache.flink.streaming.api.scala._
        val stream: DataStream[String] = streamEnv.readTextFile("hdfs://node02:8020/hadoop/words.txt")
        stream.flatMap(_.split(" ")).map((_, 1)).keyBy(0).sum(1).print()
        streamEnv.execute()
      }
    }
    
  2. Java代码

    package com.hjf.source;
    
    import org.apache.flink.api.common.functions.FlatMapFunction;
    import org.apache.flink.api.java.tuple.Tuple2;
    import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
    import org.apache.flink.util.Collector;
    
    public class DataSourceForHDFS {
        public static void main(String[] args) throws Exception {
            StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
            streamEnv.setParallelism(1);
            streamEnv.readTextFile("hdfs://node01:8020/hadoop/words.txt")
                    .flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
                        @Override
                        public void flatMap(String s, Collector<Tuple2<String, Integer>> out) throws Exception {
                            String[] arr = s.split(" ");
                            for (String ele: arr) {
                                if (ele.length() > 0) {
                                    out.collect(new Tuple2<String, Integer>(ele, 1));
                                }
                            }
                        }
                    }).keyBy(0).sum(1).print();
            streamEnv.execute();
        }
    }
    
    

三. 基于socket

socketTextStream(String hostname, int port) - 从 socket 读取。元素可以用分隔符切分。

  1. scala代码:

    package com.hjf
    
    import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
    
    object StreamWordCount {
      def main(args: Array[String]): Unit = {
        val streamEnv: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
        streamEnv.setParallelism(1)
        import org.apache.flink.streaming.api.scala._
        val stream: DataStream[String] = streamEnv.socketTextStream("node01", 8888)
        DataStream[(String, Int)] = stream.flatMap(_.split(" ")).map((_, 1)).keyBy(0).sum(1).print()
        streamEnv.execute()
      }
    }
    
    
  2. java代码

    package com.hjf.source;
    
    import org.apache.flink.api.common.functions.FlatMapFunction;
    import org.apache.flink.api.java.tuple.Tuple2;
    import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
    import org.apache.flink.util.Collector;
    
    public class DataSourceForSocket {
        public static void main(String[] args) throws Exception {
            StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
            streamEnv.setParallelism(1);
            streamEnv.socketTextStream("node01", 8888)
                    .flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
                        @Override
                        public void flatMap(String s, Collector<Tuple2<String, Integer>> out) throws Exception {
                            String[] arr = s.split(" ");
                            for (String ele: arr) {
                                if (ele.length() > 0) {
                                    out.collect(new Tuple2(ele, 1));
                                }
                            }
                        }
                    }).keyBy(0).sum(1).print();
            streamEnv.execute();
        }
    }
    
    

四. 基于Kafka

1. 读取kafka中的普通数据(String)
  1. scala代码

    import java.util.Properties
    
    import org.apache.flink.api.common.serialization.SimpleStringSchema
    import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
    import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
    import org.apache.kafka.common.serialization.StringDeserializer
    
    /**
     * @author Jiang锋时刻
     * @create 2020-07-10 16:01
     */
    object DataSourceForKafkaByString {
      def main(args: Array[String]): Unit = {
        val streamEnv: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
        streamEnv.setParallelism(1)
        import org.apache.flink.streaming.api.scala._
        val props: Properties = new Properties()
        props.setProperty("bootstrap.servers", "node01:9092,node02:9092,node03:9092")
        props.setProperty("group.id", "fink01")
        // 反序列化
        props.setProperty("key.deserializer", classOf[StringDeserializer].getName)
        props.setProperty("value.deserializer", classOf[StringDeserializer].getName)
        // 从哪里开始读
        props.setProperty("auto.offset.reset", "latest")
        val stream: DataStream[String] = streamEnv.addSource(
          new FlinkKafkaConsumer[String]("test01", new SimpleStringSchema(), props))
        val result: DataStream[(String, Int)] = stream.flatMap(_.split(" ")).map((_, 1)).keyBy(0).sum(1)
        result.print()
        streamEnv.execute()
      }
    }
    
  2. Java代码

    package com.hjf.source;
    
    import org.apache.flink.api.common.functions.FlatMapFunction;
    import org.apache.flink.api.common.serialization.SimpleStringSchema;
    import org.apache.flink.api.java.tuple.Tuple2;
    import org.apache.flink.streaming.api.TimeCharacteristic;
    import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
    import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
    import org.apache.flink.util.Collector;
    
    import java.util.Properties;
    
    /**
     * @author Jiang锋时刻
     * @create 2020-07-17 16:23
     */
    public class DataSourceForKafkaByString {
        public static void main(String[] args) throws Exception {
            StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
            streamEnv.setParallelism(1);
            
            Properties props = new Properties();
            props.setProperty("bootstrap.servers", "node01:9092,node02:9092,node03:9092");
            props.setProperty("group.id", "flink2");
            // 反序列化
            props.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
            props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
            
            FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<String>("test1", new SimpleStringSchema(),
                    props);
    
            // 从topic中最初的数据开始消费
            consumer.setStartFromLatest();
    
            // 从topic中指定的时间点开始消费, 指定时间点之前的数据忽略
            // consumer.setStartFromTimestamp(123454532L);
    
            // 从topic中指定的offset开始,这个比较复杂,需要手动指定offset
            // consumer.setStartFromSpecificOffsets(offsets);
    
            // 从topic中指定的group上次消费的位置开始消费,所以必须配置group.id参数
            // consumer.setStartFromGroupOffsets();
    
            streamEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
            streamEnv.addSource(consumer)
                    .flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
                        @Override
                        public void flatMap(String s, Collector<Tuple2<String, Integer>> out) throws Exception {
                            String[] arr = s.split(" ");
                            for (String ele: arr) {
                                if (ele.length() > 0) {
                                    out.collect(new Tuple2<String, Integer>(ele, 1));
                                }
                            }
                        }
                    }).keyBy(0).sum(1).print();
            streamEnv.execute();
        }
    }
    
    
  3. 启动kafka

    kafka-server-start.sh /opt/hjf/kafka/config/server.properties
    # 自己编写的脚本
    ./startkafka.sh
    
  4. 创建kafka生产者

    kafka-console-producer.sh --broker-list node01:9092,node02:
    9092,node03:9092 --topic test01
    
  5. 运行结果:
    在这里插入图片描述
    在这里插入图片描述

2. 读取kafka中的key-value数据
  1. 代码:

    package com.hjf.dataSource
    
    import java.util.Properties
    import org.apache.flink.api.common.typeinfo.TypeInformation
    import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
    import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer, KafkaDeserializationSchema}
    import org.apache.kafka.clients.consumer.ConsumerRecord
    import org.apache.kafka.common.serialization.StringDeserializer
    
    import org.apache.flink.streaming.api.scala._
    
    object DataSourceForKafkaByKeyValue {
      def main(args: Array[String]): Unit = {
        val streamEnv: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
        streamEnv.setParallelism(1)
        val props: Properties = new Properties()
        props.setProperty("bootstrap.servers", "node01:9092,node02:9092,node03:9092")
        props.setProperty("group.id", "flink02")
        // 反序列化
        props.setProperty("key.deserializer", classOf[StringDeserializer].getName)
        props.setProperty("value.deserializer", classOf[StringDeserializer].getName)
        // 从哪里开始读
        props.setProperty("auto.offset.reset", "latest")
    
        val stream: DataStream[(String, String)] = streamEnv.addSource(
          new FlinkKafkaConsumer[(String, String)]("test03",
            new KafkaDeserializationSchema[(String, String)] {
              // 是否是流结束
              override def isEndOfStream(t: (String, String)): Boolean = false
    
              override def deserialize(consumerRecord: ConsumerRecord[Array[Byte], Array[Byte]]): (String, String) = {
                if (consumerRecord != null) {
                  var key = "null"
                  var value = "null"
                  if (consumerRecord.key() != null) {
                    key = new String(consumerRecord.key(), "UTF-8")
                  }
                  if (consumerRecord.value() != null) {
                    value = new String(consumerRecord.value(), "UTF-8")
                  }
                  (key, value)
                } else {
                  ("null", "null")
                }
              }
    
              override def getProducedType: TypeInformation[(String, String)] = {
                createTuple2TypeInformation(createTypeInformation[String], createTypeInformation[String])
              }
            }, props))
        stream.print()
        streamEnv.execute()
      }
    }
    
    
  2. 运行结果:
    在这里插入图片描述
    在这里插入图片描述

五. 自定义源

通过实现 SourceFunction 接口来自定义无并行度(也就是并行度只能为 1)的 Source。
通过实现 ParallelSourceFunction 接口或者继承 RichParallelSourceFunction 来自定义有并行度的数据源。

  1. scala代码

    package com.hjf.dataSource
    
    import org.apache.flink.streaming.api.functions.source.SourceFunction
    import org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext
    import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
    
    import scala.util.Random
    
    object DataSourceForCustomer {
      def main(args: Array[String]): Unit = {
        val streamEnv: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
        streamEnv.setParallelism(1)
        import org.apache.flink.streaming.api.scala._
    
        val stream: DataStream[StationLog] = streamEnv.addSource(new CustomerSource)
        stream.print()
        streamEnv.execute()
      }
    }
    
    
    case class StationLog(sid:String, callOut:String, callIn:String, callType:String, callTime:Long, duration:Long)
    
    // 自定义Source源
    class CustomerSource extends SourceFunction[StationLog] {
      // 是否终止数据流的标记
      var flag = true
    
      /**
       * 启动一个Source
       * 大部分情况下, 都需要在这个run方法中实现一个循环, 这样就可以循环产生数据了
       * @param sourceContext
       */
      override def run(sourceContext: SourceContext[StationLog]): Unit = {
        val random: Random = new Random()
        val types: Array[String] = Array("fail", "busy", "barring", "success")
    
        while(flag) {
          1.to(10).map(one => {
            var callOut = "1860000%04d".format(random.nextInt(10000))
            var callIn = "1770000%04d".format(random.nextInt(10000))
            StationLog("station_" + random.nextInt(10), callOut, callIn,
              types(random.nextInt(4)), System.currentTimeMillis(), 0)
          }).foreach(sourceContext.collect(_))  // 发数据
    
          Thread.sleep(2000)
        }
      }
      
      // 终止数据流
      override def cancel(): Unit = {
        flag = false
      }
    }
    
  2. java代码

    package com.hjf.source.mysql;
    
    import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
    import org.apache.flink.streaming.api.functions.source.SourceFunction;
    
    import java.util.Random;
    
    public class DataSourceForConsumer {
        public static void main(String[] args) throws Exception {
            StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
            streamEnv.setParallelism(1);
            streamEnv.addSource(new ConsumerSource()).print();
            streamEnv.execute();
        }
    }
    
    class StationLog{
        private String sid;
        private String callOut;
        private String callIn;
        private String callType;
        private Long callTime;
        private Long duration;
    
        public StationLog(String sid, String callOut, String callIn, String callType, Long callTime, Long duration) {
            this.sid = sid;
            this.callOut = callOut;
            this.callIn = callIn;
            this.callType = callType;
            this.callTime = callTime;
            this.duration = duration;
        }
    
        @Override
        public String toString() {
            return "StationLog(" +
                    "sid='" + sid + '\'' +
                    ", callOut='" + callOut + '\'' +
                    ", callIn='" + callIn + '\'' +
                    ", callType='" + callType + '\'' +
                    ", callTime=" + callTime +
                    ", duration=" + duration +
                    ')';
        }
    }
    
    // 自定义Mysql源
    class ConsumerSource implements SourceFunction<StationLog> {
        boolean flag = true;
        @Override
        public void run(SourceContext<StationLog> out) throws Exception {
            Random random = new Random();
            String[] types = new  String[]{"fail", "busy", "barring", "success"};
    
            while (flag) {
                for (int i = 0; i < 10; i++) {
                    String callOut = String.format("1860000%04d", random.nextInt(10000));
                    String callIn = String.format("1770000%04d", random.nextInt(10000));
                    StationLog stationLog = new StationLog("station_" + random.nextInt(10), callOut, callIn,
                            types[random.nextInt(4)], System.currentTimeMillis(), 23L);
                    out.collect(stationLog);
                }
    
                Thread.sleep(2000L);
            }
        }
    
        @Override
        public void cancel() {
            flag = false;
        }
    }
    
  3. 运行结果
    在这里插入图片描述

六. 基于MySQL

  1. scala代码

    package com.hjf.dataSource
    
    import org.apache.flink.configuration.Configuration
    import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction}
    import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
    import java.sql.{Connection, DriverManager, PreparedStatement, ResultSet}
    
    object DataSourceForMySQL {
      def main(args: Array[String]): Unit = {
        val streamEnv: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
        import org.apache.flink.streaming.api.scala._
    
        streamEnv.addSource(new MySQLSource()).print()
        streamEnv.execute()
      }
    
      class MySQLSource() extends RichSourceFunction[Student] {
    
        var conn: Connection = _
        var ps: PreparedStatement = _
    
        // 连接MySQL, 并查询
        override def open(parameters: Configuration): Unit = {
    
          val driver = "com.mysql.jdbc.Driver"
          val url = "jdbc:mysql://node01:3306/test"
          val username = "root"
          val password = "123456"
          Class.forName(driver)
          conn = DriverManager.getConnection(url, username, password)
          val sql = "select * from student"
          ps = conn.prepareStatement(sql)
    
        }
    
        // 断开连接, 释放资源
        override def close(): Unit = {
          if (conn != null) {
            conn.close()
          }
          if (ps != null) {
            ps.close()
          }
        }
    
        override def run(out: SourceFunction.SourceContext[Student]): Unit = {
          val resultSet: ResultSet = ps.executeQuery()
          while (resultSet.next()) {
            val student: Student = new Student(
              resultSet.getInt("id"),
              resultSet.getString("name"),
              resultSet.getString("password"),
              resultSet.getInt("age")
            )
            out.collect(student)
          }
        }
        
        override def cancel(): Unit = {}
      }
    
      // student类
      case class Student(id: Int, name:String, password: String, age: Int)
    }
    
    
  2. java代码

    package com.hjf.source;
    
    import org.apache.flink.configuration.Configuration;
    import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
    import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
    
    import java.sql.*;
    
    public class DataSourceForMysql {
        public static void main(String[] args) throws Exception {
            StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
            streamEnv.addSource(new MySQLSource()).print();
            streamEnv.execute();
        }
    }
    
    class Student{
        private int id;
        private String name;
        private String password;
        private int age;
    
        public Student(int id, String name, String password, int age) {
            this.id = id;
            this.name = name;
            this.password = password;
            this.age = age;
        }
    
        @Override
        public String toString() {
            return "Student{" +
                    "id=" + id +
                    ", name='" + name + '\'' +
                    ", password='" + password + '\'' +
                    ", age=" + age +
                    '}';
        }
    }
    
    // 自定义Mysql源
    class MySQLSource extends RichSourceFunction<Student> {
        private Connection conn;
        private PreparedStatement ps;
    
        @Override
        public void open(Configuration parameters) throws Exception {
            conn = getConnection();
            String sql = "select * from student;";
    
            ps = conn.prepareStatement(sql);
        }
    
        private Connection getConnection() throws ClassNotFoundException, SQLException {
            Connection connection = null;
            Class.forName("com.mysql.jdbc.Driver");
            connection = DriverManager.getConnection("jdbc:mysql://node01:3306/test", "root", "123456");
    
            return connection;
        }
    
        @Override
        public void close() throws Exception {
            if (conn != null) {
                conn.close();
            }
            if (ps != null) {
                ps.close();
            }
        }
    
        @Override
        public void run(SourceContext<Student> out) throws Exception {
            ResultSet resultSet = ps.executeQuery();
    
            while (resultSet.next()) {
                Student student = new Student(
                        resultSet.getInt("id"),
                        resultSet.getString("name"),
                        resultSet.getString("password"),
                        resultSet.getInt("age")
                );
                out.collect(student);
            }
        }
    
        @Override
        public void cancel() {
        }
    }
    

六. 几种Source的特点

  1. 基于集合:有界数据集,更偏向于本地测试用
  2. 基于文件:适合监听文件修改并读取其内容
  3. 基于 Socket:监听主机的 host port,从 Socket 中获取数据
  4. 自定义 addSource:大多数的场景数据都是无界的,会源源不断的过来。比如去消费 Kafka 某个 topic 上的数据,这时候就需要用到这个 addSource

声明:

  1. 本文参考了尚学堂Flink课程的课件
  2. 本文参考了博客: Flink 从 0 到 1 学习 —— Data Source 介绍
  3. 本文参考了博客: Flink消费Kafka数据时指定offset的五种方式
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值