02 Flink 之读取数据

Flink 之读取数据

1. 从集合中读取数据

import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.api.scala._

// 创建样例类
case class SensorReading(id: String, timestamp: Long, temperature: Double)

/**
  * 从集合中读取数据
  */
object collection {
  def main(args: Array[String]): Unit = {
    // 创建执行环境
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    // 从集合中读取数据
    val stream: DataStream[SensorReading] = env.fromCollection(List(
      SensorReading("sensor_1", 1547718199, 35.80018327300259),
      SensorReading("sensor_6", 1547718201, 15.402984393403084),
      SensorReading("sensor_7", 1547718202, 6.720945201171228),
      SensorReading("sensor_10", 1547718205, 38.101067604893444)
    ))
    // 输出结果
    stream.print("stream").setParallelism(2)
    // 执行任务
    env.execute()
  }
}

2. 从文件中读取数据

import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}

/**
  * 从文件中读取数据
  */
object File {
  def main(args: Array[String]): Unit = {
    // 创建执行环境
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    // 从文件中读取数据
    val inputPath = "/Users/zgl/Documents/IdeaProjects/FlinkTutorial/src/sensors.txt"
    val stream: DataStream[SensorReading] = env.readTextFile(inputPath)
      .map(line => {
        val paras = line.split(",")
        SensorReading(paras(0).trim, paras(1).trim.toLong, paras(2).trim.toDouble)
      })
    // 输出结果
    stream.print("stream").setParallelism(4)
    // 执行任务
    env.execute()
  }
}

3. 从 Kafka 消息队列中的数据作为来源

  1. 需要先给项目的 pom.xml 文件中,引入依赖:

    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-connector-kafka-0.11_2.11</artifactId>
      <version>1.7.2</version>
    </dependency>
    
  2. 具体代码:

    import java.util.Properties
    import org.apache.flink.api.scala._
    import org.apache.flink.api.common.serialization.SimpleStringSchema
    import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
    import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011
    
    /**
      * 从 Kafka 中获取数据
      */
    object Kafka {
      def main(args: Array[String]): Unit = {
    
        // 配置信息
        val properties = new Properties()
        properties.setProperty("bootstrap.servers", "hadoop101:9092")
        properties.setProperty("group.id", "consumer-group")
        properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
        properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
        properties.setProperty("auto.offset.reset","latest")
        // 创建运行环境
        val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
        // 从 Kafka 中获取数据
        val stream: DataStream[String] = env.addSource(new FlinkKafkaConsumer011[String]("sensor", new SimpleStringSchema(), properties))
        // 输出结果
        stream.print("stream").setParallelism(1)
        // 执行任务
        env.execute()
      }
    }
    

4. 自定义 Source

import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.api.scala._
import scala.util.Random

/**
  * 使用自定义 Source
  */
object MySource {
  def main(args: Array[String]): Unit = {
    // 创建运行环境
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    // 添加自定义 Source
    val stream: DataStream[SensorReading] = env.addSource(new MySensorSource)
    // 输出结果
    stream.print().setParallelism(1)
    // 执行任务
    env.execute()
  }
}

/**
  * 创建自定义 Source ,继承 SourceFunction 重写方法
  */
class MySensorSource extends SourceFunction[SensorReading] {

  // flag:表示数据源是否还在正常运行
  var running = true

  override def cancel(): Unit = {
    running = false
  }

  override def run(sourceContext: SourceFunction.SourceContext[SensorReading]): Unit = {
    val random = new Random()

    var curTemp = 1.to(10).map(
      i => ("sensor_" + i, 65 + random.nextGaussian() * 20)
    )

    while (running) {
      curTemp = curTemp.map(
        t => (t._1, t._2 + random.nextGaussian())
      )

      val curTime: Long = System.currentTimeMillis()

      curTemp.foreach(
        t => sourceContext.collect(SensorReading(t._1, curTime, t._2))
      )

      Thread.sleep(100)
    }
  }
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值