1、KafkaSink
要先加入kafka的连接器依赖。
<!--kafka-connector连接器 1.10.1是连接器版本,和flink一致 0.11是kafka版本 2.11是scala版本-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_2.11</artifactId>
<version>1.10.1</version>
</dependency>
package com.atguigu.Adatastream_api.sink;
import com.atguigu.Fbeans.SensorReading;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer010;
/**
* 需要引入kafka-connector的依赖
* 运行时先启动zkServer,再启动kafka-server,最后启动kafka-consumer,在consumer中消费本类sink的数据
* 本类和CKafkaSource类合并在一起相当于就成了一个实时ETL的类了,有兴趣自己整理一下。
* zkServer.sh start
* kafka-server-start.sh config/server.properties &
* kafka-console-consumer.sh --bootstrap-server Linux001:9092 \
* --consumer.config 配置文件 --from-beginning --topic t001 会阻塞
*/
public class AKafkaSink {
public static void main(String[] args) throws Exception {
//创建环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
//读取数据
DataStreamSource<String> inputStream = env.readTextFile("G:\\SoftwareInstall\\idea\\project\\UserBehaviorAnalysis\\BasicKnowledge\\src\\main\\resources\\sensor.txt");
DataStream<String> result = inputStream.map(line -> {
String[] splits = line.split(",");
//这儿toString是为了数据传输时方便使用simpleStringSchema
return new SensorReading(new String(splits[0]), new Long(splits[1]), new Double(splits[2])).toString();
});
//sink到kafka的参数:brokerlist 、topic 、序列化方式
result.addSink(new FlinkKafkaProducer010<String>("localhost:9092", "sinkTest", new SimpleStringSchema()));
env.execute("KafkaSink test");
}
}
2、redis sink
需要添加redis连接器的依赖
<!--redis-connector连接器-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-redis_2.11</artifactId>
<version>1.1.5</version>
</dependency>
package com.atguigu.Adatastream_api.sink;
import com.atguigu.Fbeans.SensorReading;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
/**
* redis sink
* 先通过“redis-server"启动redis server,再通过"redis-cli"启动redis客户端,
* 程序执行前后可在redis客户端中通过“keys * ”来查看是否有本程序写入的key,即sensor
* 有则写入redis成功。
*/
public class BRedisSink {
public static void main(String[] args) throws Exception {
//创建环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
//读取数据
DataStreamSource<String> inputStream = env.readTextFile("G:\\SoftwareInstall\\idea\\project\\UserBehaviorAnalysis\\BasicKnowledge\\src\\main\\resources\\sensor.txt");
DataStream<SensorReading> result = inputStream.map(line -> {
String[] splits = line.split(",");
return new SensorReading(new String(splits[0]), new Long(splits[1]), new Double(splits[2]));
});
/**
* 配置redis,分为两步
* 第一步是设置redis连接信息,第二步是写入redis的方法
* 如果你的redis有密码。这儿需要设置密码,如果没有就不要设置,不然会报错。
*/
FlinkJedisPoolConfig config = new FlinkJedisPoolConfig.Builder()
.setHost("localhost")
.setPort(6379)
.build();
result.addSink(new RedisSink<>(config, new RedisMapper<SensorReading>() {
//设置要执行的redis命令:hset sensor sensor为hash表类型的key
@Override
public RedisCommandDescription getCommandDescription() {
return new RedisCommandDescription(RedisCommand.HSET,"sensor");
}
//hash表的value是k-v型,这儿设置value的key
@Override
public String getKeyFromData(SensorReading data) {
return data.getId();
}
//hash表的value是k-v型,这儿设置value的value
@Override
public String getValueFromData(SensorReading data) {
return data.getTemperature().toString();//redis都是按string来保存数据的
}
}));
//执行
env.execute("redis sink test!!");
}
}
3、ES sink
需要先加入Elasticsearch连接器的依赖
<!--Elasticsearch-connector连接器-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-elasticsearch6_2.11</artifactId>
<version>1.10.1</version>
</dependency>
package com.atguigu.Adatastream_api.sink;
import com.atguigu.Fbeans.SensorReading;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;
import java.util.ArrayList;
import java.util.HashMap;
/**
* 启动ES,通过查看该程序运行前后ES中是否有新增的index,即可判断本类是否写入成功
*
*/
public class CESSink {
public static void main(String[] args) throws Exception {
//创建环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
//读取数据
DataStreamSource<String> inputStream = env.readTextFile("G:\\SoftwareInstall\\idea\\project\\UserBehaviorAnalysis\\BasicKnowledge\\src\\main\\resources\\sensor.txt");
DataStream<SensorReading> result = inputStream.map(line -> {
String[] splits = line.split(",");
return new SensorReading(new String(splits[0]), new Long(splits[1]), new Double(splits[2]));
});
//设置ES集群连接地址
ArrayList<HttpHost> list=new ArrayList();
list.add(new HttpHost("localhost",9200));
//包装数据发送到ES
result.addSink(new ElasticsearchSink.Builder<SensorReading>(list, new ElasticsearchSinkFunction<SensorReading>() {
@Override
public void process(SensorReading sensor, RuntimeContext ctx, RequestIndexer index) {
//将接收到的数据转入hashmap中
HashMap<String, String> map = new HashMap<>();
map.put("id",sensor.getId());
map.put("time",sensor.getTemperature().toString());
map.put("temp",sensor.getTemperature().toString());
//创建请求,向ES发起命令
IndexRequest inr=Requests.indexRequest()
.index("sensor")
.type("readingdata") //ES 7启用了type选项
.source(map); //指定数据源
//发送请求
index.add(inr);
}
})
.build());
//执行
env.execute("ES sink test");
}
}
4、JDBC SINK
加入mysql的依赖
<!--自定义flink mysql sink 需要的mysql-conector的依赖-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.44</version>
</dependency>
package com.atguigu.Adatastream_api.sink;
import com.atguigu.Fbeans.SensorReading;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
/**
* flink1.10版本还没有jdbc sink的依赖,需要自己去定义。
* 本类自定义了一个mysql sink,加入依赖的时候因为没有flink官方的,所以我们使用了
* mysql自己的连接器驱动
*
* 本类的数据源如果换成其他会不断产生数据的数据源,进我们处理后保存到了mysql,
* java开发人员可以实时对mysql进行查询可视化展示,这就是一个实时处理的小应用了。
*
*
*/
public class DJDBCSink {
public static void main(String[] args) throws Exception {
//创建环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
//读取数据
DataStreamSource<String> inputStream = env.readTextFile("G:\\SoftwareInstall\\idea\\project\\UserBehaviorAnalysis\\BasicKnowledge\\src\\main\\resources\\sensor.txt");
DataStream<SensorReading> result = inputStream.map(line -> {
String[] splits = line.split(",");
return new SensorReading(new String(splits[0]), new Long(splits[1]), new Double(splits[2]));
});
/**
* 数据写入mysql,每来一条数据按id值更新temp值
* 默认为更新操作,如果更新不成功,则可能是该id数据还没有入库,所以我们就执行插入操作
*/
result.addSink(new RichSinkFunction<SensorReading>() {
//声明连接和预编译语句
Connection conn=null;
PreparedStatement insertPS=null;
PreparedStatement updatePS=null;
@Override
public void open(Configuration parameters) throws Exception {
conn= DriverManager.getConnection("jdbc:mysql://localhost:3306/d_student","root","123456");
insertPS=conn.prepareStatement("insert into t_sensor(id,temp) values(?,?)");
updatePS=conn.prepareStatement("update t_sensor set temp=? where id=?");
}
@Override
public void invoke(SensorReading value, Context context) throws Exception {
//先执行更新操作
updatePS.setDouble(1,value.getTemperature());
updatePS.setString(2,value.getId());
updatePS.execute();
if(updatePS.getUpdateCount() == 0){//更新条数为0,则更新失败,则开始插入。
insertPS.setString(1,value.getId());
insertPS.setDouble(2,value.getTemperature());
insertPS.execute();
}
}
@Override
public void close() throws Exception {
insertPS.close();
updatePS.close();
conn.close();
}
});
//执行
env.execute("自定义JDBC-mysql sink");
}
}