2021.1.5课堂笔记(flink读取数据source源)

flink的相关依赖:

<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-scala_2.11</artifactId>
    <version>1.7.2</version>
</dependency>
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-streaming-scala_2.11</artifactId>
    <version>1.7.2</version>
</dependency>
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-connector-kafka_2.11</artifactId>
    <version>1.7.2</version>
</dependency>
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-java</artifactId>
    <version>1.7.2</version>
</dependency>
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-streaming-java_2.11</artifactId>
    <version>1.7.2</version>
</dependency>

source源:
从文件中读取数据:

import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}

object SourceFile {
  def main(args: Array[String]): Unit = {
  // 获取flink执行环境
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    // 从文件中读取数据
    val fileDS: DataStream[String] = env.readTextFile("input/data.txt")
    // 打印 DataStream 中的内容
    fileDS.print()
    //执行flink程序
    env.execute("sensor")
  }
}

从Kafka中读取数据:

import java.util.Properties

import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.datastream.DataStreamSource
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.kafka.clients.consumer.ConsumerConfig

object SourceKafka {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    //设置连接kafka的配置信息
    val prop = new Properties()
    //ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG  配置Kafka消费者属性
    prop.setProperty("bootstrap.servers","192.168.237.100:9092")
    prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"flink-kafka-demo")
    //简单的写法: prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,classOf[StringDeserializer].getName)
    prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringDeserializer")
    prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringDeserializer")
    // earliest:从头开始消费,旧数据会频繁消费
	// latest:从最近的数据开始消费,不再消费旧数据
    prop.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"latest")

    val kafkaDStream: DataStreamSource[String] = env.addSource(
      new FlinkKafkaConsumer[String](
        "sensor",
        //kafka命令消费key value值
		//kafka-console-consumer.sh --zookeeper node01:2181 --topic flink-kafka --property print.key=true
		// 默认只是消费value值
		// KafkaDeserializationSchema:读取kafka中key、value
		// SimpleStringSchema:读取kafka中value
        new SimpleStringSchema(),
        prop)
    )
    kafkaDStream.print()
    env.execute("kafkademo")
  }
}

从集合中读取数据:一般只用于测试

import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.scala._      //隐式函数的包,要手动导入

object SourceTest {
  def main(args: Array[String]): Unit = {
    //创建环境
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    //env.setParallelism(2)   并行度,不设置就是机器的节点数
    //创建流
    val stream1: DataStream[String] = env.fromCollection(List(
      "hello java"
      ,"hello flink"
      ,"hello flink"
    ))
    stream1.print()
    env.execute("demo")
  }
}

自定义数据源:

import org.apache.flink.streaming.api.datastream.DataStreamSource
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.apache.flink.streaming.api.functions.source.SourceFunction

import scala.util.Random

object MySensorSource {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    val mydefDStream: DataStreamSource[WaterSensor] = env.addSource(new MySensorSource())

    mydefDStream.print()
    env.execute("mydefsource")
  }
}

case class WaterSensor(id:String,ts:Long,vc:Double)   //传感器id  事件戳ts  水位高度vc

class MySensorSource extends SourceFunction[WaterSensor]{

  var flag=true

  override def run(sourceContext: SourceFunction.SourceContext[WaterSensor]): Unit = {
  while (flag){
    sourceContext.collect(
      WaterSensor(
        "sensor_"+new Random().nextInt(3),
        System.currentTimeMillis(),
        new Random().nextInt(5)+40
      )
    )
    Thread.sleep(1000)
  }
  }

  override def cancel(): Unit = {
    flag=false
  }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值