flink的相关依赖:
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
<version>1.7.2</version>
</dependency>
source源:
从文件中读取数据:
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
object SourceFile {
def main(args: Array[String]): Unit = {
// 获取flink执行环境
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
// 从文件中读取数据
val fileDS: DataStream[String] = env.readTextFile("input/data.txt")
// 打印 DataStream 中的内容
fileDS.print()
//执行flink程序
env.execute("sensor")
}
}
从Kafka中读取数据:
import java.util.Properties
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.datastream.DataStreamSource
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.kafka.clients.consumer.ConsumerConfig
object SourceKafka {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
//设置连接kafka的配置信息
val prop = new Properties()
//ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG 配置Kafka消费者属性
prop.setProperty("bootstrap.servers","192.168.237.100:9092")
prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"flink-kafka-demo")
//简单的写法: prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,classOf[StringDeserializer].getName)
prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringDeserializer")
prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringDeserializer")
// earliest:从头开始消费,旧数据会频繁消费
// latest:从最近的数据开始消费,不再消费旧数据
prop.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"latest")
val kafkaDStream: DataStreamSource[String] = env.addSource(
new FlinkKafkaConsumer[String](
"sensor",
//kafka命令消费key value值
//kafka-console-consumer.sh --zookeeper node01:2181 --topic flink-kafka --property print.key=true
// 默认只是消费value值
// KafkaDeserializationSchema:读取kafka中key、value
// SimpleStringSchema:读取kafka中value
new SimpleStringSchema(),
prop)
)
kafkaDStream.print()
env.execute("kafkademo")
}
}
从集合中读取数据:一般只用于测试
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.scala._ //隐式函数的包,要手动导入
object SourceTest {
def main(args: Array[String]): Unit = {
//创建环境
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
//env.setParallelism(2) 并行度,不设置就是机器的节点数
//创建流
val stream1: DataStream[String] = env.fromCollection(List(
"hello java"
,"hello flink"
,"hello flink"
))
stream1.print()
env.execute("demo")
}
}
自定义数据源:
import org.apache.flink.streaming.api.datastream.DataStreamSource
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.apache.flink.streaming.api.functions.source.SourceFunction
import scala.util.Random
object MySensorSource {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val mydefDStream: DataStreamSource[WaterSensor] = env.addSource(new MySensorSource())
mydefDStream.print()
env.execute("mydefsource")
}
}
case class WaterSensor(id:String,ts:Long,vc:Double) //传感器id 事件戳ts 水位高度vc
class MySensorSource extends SourceFunction[WaterSensor]{
var flag=true
override def run(sourceContext: SourceFunction.SourceContext[WaterSensor]): Unit = {
while (flag){
sourceContext.collect(
WaterSensor(
"sensor_"+new Random().nextInt(3),
System.currentTimeMillis(),
new Random().nextInt(5)+40
)
)
Thread.sleep(1000)
}
}
override def cancel(): Unit = {
flag=false
}
}