1.KafkaSink
使用kafkaSink首先需要添加依赖
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.12</artifactId>
<version>1.13.1</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.75</version>
</dependency>
最重要的就是创建一个Flink程序用的生产者
FlinkKafkaProducer<String> kafkaProducer = new FlinkKafkaProducer<>("hadoop102:9092", "first", new SimpleStringSchema());
其中String泛型代表你写入到kafka的数据类型
package net.cyan.Sink;
import com.alibaba.fastjson.JSON;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.SimpleTimerService;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
public class Demo1_kafkaSink {
public static void main(String[] args) {
//创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//创建FlinkKafkaProducer
FlinkKafkaProducer<String> kafkaProducer = new FlinkKafkaProducer<>("hadoop102:9092", "first", new SimpleStringSchema());
env.socketTextStream("hadoop103",9999)
//map处理数据
.map(new MapFunction<String, String>() {
@Override
public String map(String s) throws Exception {
//切割数据集
String[] split = s.split(",");
//转换
return JSON.toJSONString(split[0].concat(split[1]).concat(split[2]));
}
})
//添加
.addSink(kafkaProducer);
try {
//启动执行环境
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
当你写入到kafka的数据类型为String时,基本没问题。当你使用自定义POJO(等同于Bean)来封装你写入的数据时,那么上述producer方法中的默认序列化器就不能序列化了,那么我们就需要使用它的重载方法
2.自定义MySqlSink
自定义一个Sink,向mysql写入数据
第一步,导入依赖
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.49</version>
</dependency>
第二步,自定义Sink类
/*
只有RichXXXFunction才有open及close方法
向mysql写入数据,一个Task至少需要创建一个Connection,才能连上mysql
open 方法:
close方法:Task结束,释放连接
MySink extends RichSinkFunction 间接 extends AbstractRichFunction implements SinkFunction<IN>
*/
private static class MySink extends RichSinkFunction<String>{
private Connection connection;
@Override
//Task创建时调用一次
public void open(Configuration parameters) throws Exception {
connection = DriverManager.getConnection("jdbc:mysql://hadoop102:3306/uba_db?useUnicode=true&characterEncoding=UTF-8",
"root",
"123321");
}
@Override
//Task关闭时调用一次
public void close() throws Exception {
//判断连接
if (connection!=null){
connection.close();
}
}
@Override
//一条数据调用一次
public void invoke(String value, Context context) throws Exception {
//幂等性写法
String sql ="replace into tableName values(?,?,?)";
//预编译
PreparedStatement preparedStatement = connection.prepareStatement(sql);
//填充,一个占位符一条数据,第一个参数占位符的位置,第二个参数 填充至占位符的数据
preparedStatement.setString(1,value.getId());
preparedStatement.setString(2,Value.getInt());
preparedStatement.setString(3,Value.getInt());
}
}
第三步,直接在调用addSink方法添加即可
//创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.socketTextStream("hadoop103", 9999)
.map(new MapFunction<String, String>() {
@Override
public String map(String s) throws Exception {
return null;
}
}).addSink(new MySink());
try {
//启动执行环境
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
3.RedisSink
RedisSink最主要的就是创建FlinkJedisPoolConfig,连接池和导入依赖,依赖只有一版没有更新了
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-redis -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-redis_2.11</artifactId>
<version>1.1.5</version>
</dependency>
//创建一个FlinkJedisPoolConfig,连接池
FlinkJedisPoolConfig jedis = new FlinkJedisPoolConfig.Builder()
.setHost("hadoop102") //主机名
.setPort(6379) //端口号
.setDatabase(0) //库id
.setMaxTotal(20) //最大连接数
.setMaxIdle(10) //最大空闲数
.setMinIdle(5) //最小空闲数
.setTimeout(60000) //超时时长,60S
.build();
然后自定义一个Sink使用addSink添加即可
package net.cyan.Sink;
import com.alibaba.fastjson.JSON;
import net.cyan.POJO.WaterSensor;
import org.apache.avro.data.Json;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisClusterConfig;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
import java.sql.Connection;
import java.sql.DriverManager;
/*
自定义Sink向Redis写入数据
*/
public class Demo3_RedisSink {
public static void main(String[] args) {
//创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//创建一个FlinkJedisPoolConfig,连接池
FlinkJedisPoolConfig jedis = new FlinkJedisPoolConfig.Builder()
.setHost("hadoop102") //主机名
.setPort(6379) //端口号
.setDatabase(0) //库id
.setMaxTotal(20) //最大连接数
.setMaxIdle(10) //最大空闲数
.setMinIdle(5) //最小空闲数
.setTimeout(60000) //超时时长,60S
.build();
env.socketTextStream("hadoop103", 9999)
.map(new MapFunction<String, WaterSensor>() {
@Override
public WaterSensor map(String s) throws Exception {
String[] split = s.split(",");
return new WaterSensor(split[0], Long.valueOf(split[0]), Integer.valueOf(split[2]));
}
})
//添加自定义Sink
.addSink(new RedisSink<WaterSensor>(jedis, new MyStringMapping()));
try {
//启动执行环境
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
private static class MyStringMapping implements RedisMapper<WaterSensor> {
@Override
//秒描述要写入redis的命令
public RedisCommandDescription getCommandDescription() {
//单个参数构造
return new RedisCommandDescription(RedisCommand.SET);
}
@Override
//使用数据的那部分作为key
public String getKeyFromData(WaterSensor waterSensor) {
return waterSensor.getId();
}
@Override
//使用那部分作为value
public String getValueFromData(WaterSensor waterSensor) {
return JSON.toJSONString(waterSensor);
}
}
}
List、Set使用与String基本相同,只需要将自定义Sink的返回值设置为相对应的类型,写入redis的命令换为对应类型的命令即可
4.JDBCSink(Clikhouse为例)
CK建表语句
create table if not exists watersensor(
id String,
ts UInt64,
vc UInt32
)engine =ReplacingMergeTree(ts)
order by (id,ts)
同样,先导入依赖,需要导入两个,一个jdbc的一个CK的
<dependency>
<groupId>ru.yandex.clickhouse</groupId>
<artifactId>clickhouse-jdbc</artifactId>
<version>0.1.55</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc_2.11</artifactId>
<version>1.13.6</version>
</dependency>
然后构造一个JDBCSink
public static <T> SinkFunction<T> sink(
String sql,
JdbcStatementBuilder<T> statementBuilder,
JdbcConnectionOptions connectionOptions) {
return sink(sql, statementBuilder, JdbcExecutionOptions.defaults(), connectionOptions);
}
代码如下
package net.cyan.Sink;
import lombok.val;
import net.cyan.POJO.WaterSensor;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.connector.jdbc.*;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import java.sql.PreparedStatement;
import java.sql.SQLException;
public class Demo5_JDBCSink {
public static void main(String[] args) {
//创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//创建jdbcSink所需要的四个参数
JdbcStatementBuilder<WaterSensor>statementBuilder=new JdbcStatementBuilder<WaterSensor>() {
@Override
//流中的每一个watersensor执行一次
public void accept(PreparedStatement preparedStatement, WaterSensor waterSensor) throws SQLException {
//填充占位符
preparedStatement.setString(1,waterSensor.getId());
preparedStatement.setLong(2,waterSensor.getTs());
preparedStatement.setInt(3,waterSensor.getVc());
}
};
JdbcExecutionOptions build = JdbcExecutionOptions.builder()
.withBatchSize(200) //一批次写多少
.withMaxRetries(3) //最大失败重试次数,最好支持幂等性
.withBatchIntervalMs(2000) //当数据不满一批时,多久写入
.build();
JdbcConnectionOptions CKbuilder = new JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
.withUrl("jdbc:clickhouse://hadoop102:8123/upp220608") //ck地址
.build();
//填入参数
SinkFunction<WaterSensor> sink = JdbcSink.sink(
"insert into watersensor values(?,?,?)",
statementBuilder,
build,
CKbuilder
);
env.socketTextStream("hadoop103", 9999)
.map(new MapFunction<String, WaterSensor>() {
@Override
public WaterSensor map(String s) throws Exception {
String[] split = s.split(",");
return new WaterSensor(split[0], Long.valueOf(split[1]), Integer.valueOf(split[2]));
}
})
.addSink(sink);
try {
//启动执行环境
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
运行后直接在虚拟机发送数据即可