一、创建执行环境Environment
1,getExecutionEnvironment
ExecutionEnvironment evn = ExecutionEnvironment.getExecutionEnvironment();
//此方法做了封装,在本地执行环境下会返回本地的执行环境变量,在集群环境会返回集群环境的变量,env默认并行度设置在flink-conf.yaml内;
LocalStreamEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(1); //本地环境变量,需设置并行度作为参数
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("jobmanage-hostname", 6123, "../WordCount.jar"); //jobmanager的ip和端口号。
二、数据源Source
1,从集合读取数据
以传感器探测温度为例,先创建一个SensorReading类
package com.cys.apitest.beans;
//传感器温度读取数据的数据类型
public class SensorReading {
private String id;
private Long timestamp;
private Double temperature;
public SensorReading(String id, Long timestamp, Double temperature) {
this.id = id;
this.timestamp = timestamp;
this.temperature = temperature;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public Long getTimestamp() {
return timestamp;
}
public void setTimestamp(Long timestamp) {
this.timestamp = timestamp;
}
public Double getTemperature() {
return temperature;
}
public void setTemperature(Double temperature) {
this.temperature = temperature;
}
@Override
public String toString() {
return "SensorReading{" +
"id='" + id + '\'' +
", timestamp=" + timestamp +
", temperature=" + temperature +
'}';
}
}
package com.cys.apitest.source;
import com.cys.apitest.beans.SensorReading;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import java.util.Arrays;
public class SourceTest_Collection {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//从集合读取数据
DataStream<SensorReading> dataStream = env.fromCollection(Arrays.asList( new SensorReading("s1", 1728493489L, 37.1),
new SensorReading("s2", 1728493439L, 36.1),
new SensorReading("s3", 1728493489L, 38.0)
));
DataStream<Integer> integerDataStream = env.fromElements(1,2,5,67,9);
dataStream.print("data");
integerDataStream.print("int"); //输出标识符int
//执行作业流
env.execute("作业名称1");
}
}
2,从文件读取数据
public class Source_File {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//从文件读取数据
DataStream<String> dataStream = env.readTextFile("filePath");
dataStream.print();
env.execute();
}
}
3,从kafka读取数据
1)启动zk
./bin/zkServer.sh start
2)启动kafka,并创建topic
./bin/kafka-server-start.sh -daemon ./config/server.properties
./bin/kafka-console-producer.sh --broker-list localhost:9082 --topic topicname //控制台生产者
3)pom.xml添加flink-kafka连接器依赖
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_2.12</artifactId>
<version>1.10.1</version>
</dependency>
4)编写代码读取数据,并输出。
public class Source_Kafka {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//kafka属性设置
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "localhost:9092");
properties.setProperty("group.id", "consumer-group");
properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.setProperty("auto.offset.reset", "latest");
//通过addSource添加外部数据源,FlinkKafkaConsumer实现SourceFunction方法。
DataStream<String> dataStream = env.addSource(new FlinkKafkaConsumer011<String>("topic1", new SimpleStringSchema(), properties));
dataStream.print();
env.execute();
}
}
4,自定义source
自定义的source需要通过env.addSource添加SourceFunction的实现类。
package com.cys.apitest.source;
import com.cys.apitest.beans.SensorReading;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import java.util.HashMap;
import java.util.Random;
public class Source_Selfdefinition {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<SensorReading> dataStream = env.addSource(new MySensorSource());
dataStream.print();
env.execute();
}
//自定义sourceFunction
public static class MySensorSource implements SourceFunction<SensorReading>{
//定义标志位,控制数据生成和停止run
private boolean flag = true;
@Override
public void run(SourceContext<SensorReading> sourceContext) throws Exception {
//定义各一随机数生成器
Random random = new Random();
//设置10个传感器的初试温度
HashMap<String, Double> sensorTempMap = new HashMap<String, Double>();
for(int i = 0; i < 10; i++){
sensorTempMap.put("sensor_" + (i+1), 60 + random.nextGaussian() * 20);
}
while (flag){
for(String sensorId: sensorTempMap.keySet()){
Double newtemp = sensorTempMap.get(sensorId) + random.nextGaussian();
sensorTempMap.put(sensorId, newtemp);
sourceContext.collect(new SensorReading(sensorId, System.currentTimeMillis(), newtemp));
}
Thread.sleep(1000L);
}
}
@Override
public void cancel() {
}
}
}