一、读取Kafka
先添加kafka连接器的依赖
<!--kafka-connector连接器 1.10.1是连接器版本,和flink一致 0.11是kafka版本 2.11是scala版本-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_2.11</artifactId>
<version>1.10.1</version>
</dependency>
package com.atguigu.Adatastream_api.source;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import java.util.Properties;
/**
* 需要引入kafka-connector的依赖
* 运行时先启动zkServer,再启动kafka-server,最后启动kafak-producer,在producer中生产数据,这儿能消费到
* zkServer.sh start
* kafka-server-start.sh config/server.properties &
* kafka-console-producer.sh -broker-list Linux001:9092 --topic t001 会阻塞
*
* 读取kafka数据源数据,如果想要从头读取,必须满足两个条件:
* 第一:消费者组必须是最新的,即这个消费者组没有消费过。
* 第二:消费策略auto.offset.reset的值必须是earliest.
*/
public class CKafkaSource {
public static void main(String[] args) throws Exception {
//创建环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
/**
* kafka传递而来的消息有很多字段,具体的数据在value字段中,key字段是kafka分区,
*/
Properties ps=new Properties();
ps.setProperty("bootstrap.servers","localhost:9092");//集群地址
ps.setProperty("group.id", "consumer-group");//消费者组
ps.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");//key序列化方式
ps.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");//value序列化方式
ps.setProperty("auto.offset.reset", "latest");//消费策略
//其实第二个参数指定了序列化方式,那key和value的序列化方式就不用指定了
DataStream result = env.addSource(new FlinkKafkaConsumer011<String>("sensor", new SimpleStringSchema(), ps));
//输出并执行
result.print("kafka数据:");
env.execute("消费kafka数据");
}
}
二、读取集合
package com.atguigu.Adatastream_api.source;
import com.atguigu.Fbeans.SensorReading;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import java.util.Arrays;
public class BReadFromCollection {
public static void main(String[] args) throws Exception {
//创建环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
/**
* 从集合读取数据
*/
DataStreamSource<SensorReading> inputStream1 = env.fromCollection(Arrays.asList(
new SensorReading("sensor_1", 1547718199L, 35.8),
new SensorReading("sensor_6", 1547718201L, 15.4),
new SensorReading("sensor_1", 1547718202L, 16.7),
new SensorReading("sensor_1", 1547718205L, 38.3)
));
DataStream<Integer> inputStream2 = env.fromElements(1, 2, 4, 67,189);
//输出结果
inputStream1.print("inputStream1");
inputStream2.print("inputStream2");
env.execute("Read from Collection");
}
}
三、读取自定义数据源
package com.atguigu.Adatastream_api.source;
import com.atguigu.Fbeans.SensorReading;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
import java.util.HashMap;
import java.util.Random;
/**
* 用户自定义数据源
*/
public class DUserdefinedSource {
public static void main(String[] args) throws Exception {
//创建环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
/*
读取自定义数据源的数据
*/
DataStreamSource<SensorReading> result = env.addSource(new RichParallelSourceFunction<SensorReading>() {
private boolean running = true;//控制数据生成的标识位
//生成数据
@Override
public void run(SourceContext<SensorReading> sct) throws Exception {
HashMap<String, Double> map = new HashMap();
Random random = new Random();
for (int i = 0; i < 10; i++) {//生成10个传感器的基础温度
map.put("sensor_" + (i + 1), 60 + random.nextGaussian() * 20);
}
while (running) {//10个传感器温度波动,生成最终带事件时间的数据
for (String sensor : map.keySet()) {
map.put(sensor, map.get(sensor) + random.nextGaussian()*3);
sct.collect(new SensorReading(sensor, System.currentTimeMillis(), map.get(sensor)));
}
}
Thread.sleep(1000L);
}
//取消数据生成
@Override
public void cancel() {
running = false;
}
});
//处理数据
//输出数据
result.print("自定义数据源");
env.execute(" 自定义数据源");
}
}