Flink Sink Hbase

本文介绍如何利用Flink实现数据实时流转,详细讲解了从Flink主类配置,到反序列化过程,以及如何通过BeanUtil、HTableBase接口、UserHTable、HbaseBaseMap、HtableRow和HbaseSink等组件,最终将数据成功写入HBase存储系统。
摘要由CSDN通过智能技术生成

Flink将数据落地Hbase

Flink主类

package flink.sink2hbase;

import flink.sink2hbase.deserialization.JsonDeserializationSchema;
import flink.sink.HbaseSinkFunction;
import flink.sink2hbase.map.HTableBaseMap;
import flink.sink2hbase.table.UserHTable;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import flink.sink2hbase.pojo.User;
import util.BeanUtil;
import util.Property;

import java.util.Properties;

public class FlinkSinkHbase {
   

    private static OutputTag<UserHTable> userOutputTag = new OutputTag<>("用户表", TypeInformation.of(UserHTable.class));

    public static void main(String[] args) throws Exception {
   

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        Properties prop = Property.getKafkaProperties();
        prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
        prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
        prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"g1");
        
        // 将从kafka中读取过来的Json串反序列化成User对象
        FlinkKafkaConsumer<User> consumer = new FlinkKafkaConsumer<>("test",new JsonDeserializationSchema<>(User.class),prop);

        DataStreamSource<User> mainStream = env.addSource(consumer);

        SingleOutputStreamOperator<User> dataStream = mainStream
            .process(new ProcessFunction<User, User>() {
   
            
  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
可以通过以下代码实现FlinkHBaseSink: ```java import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema; import org.apache.flink.streaming.connectors.kafka.KafkaSink; import org.apache.flink.streaming.connectors.kafka.KafkaTopicPartition; import org.apache.flink.streaming.connectors.kafka.internals.KeyedSerializationSchemaWrapper; import org.apache.flink.streaming.util.serialization.KeyedSerializationSchema; import org.apache.flink.streaming.util.serialization.KeyedSerializationSchemaWrapper; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.util.Bytes; import org.apache.flink.streaming.connectors.hadoop.FsStateBackend; import org.apache.flink.streaming.connectors.hadoop.HadoopFileSystem; import org.apache.flink.streaming.connectors.hadoop.HadoopOutputFormat; import org.apache.flink.streaming.connectors.hadoop.HadoopSink; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.mapred.lib.NullOutputFormat; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.kafka.clients.producer.ProducerRecord; import org.apache.kafka.common.serialization.StringSerializer; import java.io.IOException; import java.util.Properties; public class FlinkHBaseSink { public static void main(String[] args) throws Exception { // set up the execution environment final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // set up the HBase configuration org.apache.hadoop.conf.Configuration config = HBaseConfiguration.create(); config.set("hbase.zookeeper.quorum", "localhost"); config.set("hbase.zookeeper.property.clientPort", "2181"); config.set("hbase.master", "localhost:60000"); // create a connection to the HBase database Connection connection = ConnectionFactory.createConnection(config); // create a table instance Table table = connection.getTable(TableName.valueOf("mytable")); // create a stream from Kafka Properties properties = new Properties(); properties.setProperty("bootstrap.servers", "localhost:9092"); properties.setProperty("group.id", "test"); FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<>("mytopic", new SimpleStringSchema(), properties); DataStream<String> stream = env.addSource(consumer); // map the stream to a HBase Put object DataStream<Put> hbaseStream = stream.map(new MapFunction<String, Put>() { @Override public Put map(String value) throws Exception { String[] parts = value.split(","); Put put = new Put(Bytes.toBytes(parts[])); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("col1"), Bytes.toBytes(parts[1])); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("col2"), Bytes.toBytes(parts[2])); return put; } }); // write the stream to HBase hbaseStream.addSink(new HBaseSinkFunction(table)); // execute program env.execute("Flink HBase Sink"); } public static class HBaseSinkFunction extends org.apache.flink.streaming.api.functions.sink.RichSinkFunction<Put> { private Table table; public HBaseSinkFunction(Table table) { this.table = table; } @Override public void invoke(Put value, Context context) throws Exception { table.put(value); } @Override public void close() throws IOException { table.close(); } } } ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值