1 从集合读取数据
// 定义样例类:水位传感器:用于接收空高数据
// id:传感器编号
// ts:时间戳
// vc:空高
case class WaterSensor(id:String, ts:Long, vc:Double)
object Source_Collection {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment =
StreamExecutionEnvironment.getExecutionEnvironment
val sensorDS: DataStream[WaterSensor] = env.fromCollection(
List(
WaterSensor("ws_001", 1577844001, 45.0),
WaterSensor("ws_002", 1577844015, 43.0),
WaterSensor("ws_003", 1577844020, 42.0)
)
)
sensorDS.print()
env.execute("sensor")
}
}
2 从文件读取数据
val env: StreamExecutionEnvironment =
StreamExecutionEnvironment.getExecutionEnvironment
val fileDS: DataStream[String] = env.readTextFile("input/data.txt")
fileDS.print()
env.execute("sensor")
3 以kafka消息队列的数据作为来源
引入kafka连接器的依赖:
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka-0.11 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_2.11</artifactId>
<version>1.7.2</version>
</dependency>
代码实现:
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka-0.11 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_2.11</artifactId>
<version>1.7.2</version>
</dependency>
代码实现:
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val properties = new Properties()
properties.setProperty("bootstrap.servers", "hadoop1:9092")
properties.setProperty("group.id", "consumer-group")
properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
properties.setProperty("auto.offset.reset", "latest")
val kafkaDS: DataStream[String] = env.addSource(
new FlinkKafkaConsumer011[String]("sensor", new SimpleStringSchema(), properties)
)
kafkaDS.print()
env.execute("sensor")
4 自定义Source
除了以上的source数据来源,我们还可以自定义source。调用如下:
val env: StreamExecutionEnvironment =
StreamExecutionEnvironment.getExecutionEnvironment
val mySensorDS: DataStream[WaterSensor] = env.addSource(
new MySensorSource()
)
mySensorDS.print()
env.execute("sensor")
MySensorSource具体的代码实现如下:
class MySensorSource extends SourceFunction[WaterSensor] {
var flg = true
override def run(ctx: SourceFunction.SourceContext[WaterSensor]): Unit = {
while ( flg ) {
// 采集数据
ctx.collect(
WaterSensor(
"sensor_" +new Random().nextInt(3),
1577844001,
new Random().nextInt(5)+40
)
)
Thread.sleep(100)
}
}
override def cancel(): Unit = {
flg = false;
}
}