Java读Kafka后写入HBase实例

前言:思路分析

a.读写分别对应不同的操作,这里定义两个接口,分别对应读写操作;
b.读取Kafka数据,这里接口参数设置两个,一个是Properties,另外一个则是topicName;
c…写操作这里不打算传入HBase的Connection,简单粗暴一点直接传入读取的ConsumerRecord直接向HBase写入;
d.至于HBase的连接对象Connection,这里通过其他类来new,并且采用多线程写入的方式(创建连接对象时设置线程数,这部分线程可以放在线程池中,减少JVM的重复开启和关闭的操作)
e.这里笔者需要写入多个不同的表,就将写入方式也单独作为类来写,这样通过调用不同的类即可;

TIPS:建议直接看 5.1 NormalFactory,思路清晰一些


包结构

java包结构如下:

在这里插入图片描述


一、poml

  • 编码所用依赖如下:
<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka -->
    <dependency>
      <groupId>org.apache.kafka</groupId>
      <artifactId>kafka_2.11</artifactId>
      <version>2.0.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients -->
    <dependency>
      <groupId>org.apache.kafka</groupId>
      <artifactId>kafka-clients</artifactId>
      <version>2.0.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hbase</groupId>
      <artifactId>hbase-client</artifactId>
      <version>1.2.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hbase</groupId>
      <artifactId>hbase-common</artifactId>
      <version>1.2.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hbase</groupId>
      <artifactId>hbase-server</artifactId>
      <version>1.2.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-common</artifactId>
      <version>2.6.0</version>
    </dependency>

二、读写接口

2.1 Read

package cn.wsj.services;

import java.io.IOException;
import java.util.Properties;

public interface Read {
    public void readKafka(Properties prop,String topicName) throws IOException;
}


2.2 Write

package cn.wsj.services;

import org.apache.kafka.clients.consumer.ConsumerRecords;

public interface Write {
    public void writeHBase(ConsumerRecords<String,String> records);
}


三、读Kafka

3.1 KafkaUtils

  • KafkaUtils:返回KafkaConsumer实例对象;
package cn.wsj.services.common;

import org.apache.kafka.clients.consumer.KafkaConsumer;

import java.util.Properties;

public class KafkaUtils {
    public static KafkaConsumer<String,String> getConsumer(Properties prop){
        return  new KafkaConsumer<String,String>(prop);
    }
}


3.2 KafkaReadImpl

  • KafkaReadImpl:实例对象时需要传入Write实例对象为的就是读取Kafka后直接写入HBase
package cn.wsj.services.kafkareadutil;

import cn.wsj.services.Read;
import cn.wsj.services.Write;
import cn.wsj.services.common.KafkaUtils;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;


import java.time.Duration;
import java.util.Arrays;
import java.util.Properties;

public class KafkaReadImpl implements Read {
    private Write write;
    public KafkaReadImpl(Write write){
        this.write = write;
    }

    @Override
    public void readKafka(Properties prop, String topicName) {
        KafkaConsumer<String, String> consumer = KafkaUtils.getConsumer(prop);
        consumer.subscribe(Arrays.asList(topicName));
        while(true){
            ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
            write.writeHBase(records);
        }
    }
}


四、写HBase

4.1 HBaseConf

  • HBaseConf:负责创建confiuration对象
package cn.wsj.services.conf;

import org.apache.hadoop.conf.Configuration;

public class HBaseConf {
    public static Configuration getConf(){
        Configuration config = new Configuration();
        config.set("hbase.zookeeper.quorum","sole");
        return config;
    }

}


4.2 HBaseUtils

  • HBaseUtils:负责实例化HBase Connection对象;
package cn.wsj.services.common;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;

import java.io.IOException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class HBaseUtils {
    public static Connection getCon(Configuration config) throws IOException {
        ExecutorService es = Executors.newFixedThreadPool(10);
        Connection con = ConnectionFactory.createConnection(config, es);
        return con;
    }
}


4.3 UsersImpl

  • UsersImpl:指定HBase Table,之后将传入参数的ConsumerRecords组装为List<Puts>放入HBase表中;
  • Put对象实例化时需要传入指定rowkey(Bytes类型);
package cn.wsj.services.hbasewirteimpl;

import cn.wsj.services.Write;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class UsersImpl implements Write {
    private Connection con;
    public UsersImpl(Connection con){
        this.con = con;
    }
    @Override
    public void writeHBase(ConsumerRecords<String, String> records) {
        try {
            Table table = con.getTable(TableName.valueOf("inters:users"));
            List<Put> data = new ArrayList<>();
            for (ConsumerRecord<String, String> record : records) {
                String[] info = record.value().split(",", -1);
                Put put = new Put(info[0].getBytes());
                put.addColumn("base".getBytes(),"userid".getBytes(),info[1].getBytes());
                put.addColumn("base".getBytes(),"locale".getBytes(),info[1].getBytes());
                put.addColumn("base".getBytes(),"birthyear".getBytes(),info[2].getBytes());
                put.addColumn("base".getBytes(),"gender".getBytes(),info[3].getBytes());
                put.addColumn("base".getBytes(),"joinedAt".getBytes(),info[4].getBytes());
                put.addColumn("base".getBytes(),"location".getBytes(),info[5].getBytes());
                put.addColumn("base".getBytes(),"timezone".getBytes(),info[6].getBytes());
                data.add(put);
            }
            table.put(data);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}


五、application运行

5.1 NormalFactory

  • NormalFactory:将类组装完成读写流程;
package cn.wsj.services.app;

import cn.wsj.services.Read;
import cn.wsj.services.Write;
import cn.wsj.services.common.HBaseUtils;
import cn.wsj.services.conf.HBaseConf;
import cn.wsj.services.hbasewirteimpl.UsersImpl;
import cn.wsj.services.kafkareadutil.KafkaReadImpl;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.StringDeserializer;

import java.util.Properties;

public class NormalFactory {
    public static void run(String topic) throws Exception {
    	//利用HBaseConf.getConf()获取configuration
    	//再用HBaseUtils.getCon()获取connection
        Connection con = HBaseUtils.getCon(HBaseConf.getConf());
        Write write = null;
        //这里笔者还有其他的HBase表需要写入,为了方便调用,采用switch case方式
        switch (topic){
        	//如果运行时为users,直接创建UsersImpl实体类,这里将write实例化成功,下面将调用
            case "users":write=new UsersImpl(con);break;
            default:
                throw new Exception("NotFoundTopic");
        }
		//这里除了读取Kafka消息外,还直接调用write的写入方法(writeHBase),将数据写入Kafka
        Read read = new KafkaReadImpl(write);
        Properties prop = new Properties();
        prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"sole:9092");
        prop.put(ConsumerConfig.GROUP_ID_CONFIG,"wsj");
        prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getTypeName());
        prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class.getTypeName());
        prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
        read.readKafka(prop,topic);
    }
}

5.2 App

  • App:main方法调用开始任务
package cn.wsj.services.app;

public class App {
    public static void main(String[] args) throws Exception {
        NormalFactory.run("users");
    }
}

Test

  • 这里我先通过Flume向kafka发送日志数据,如下图共计38209条数据

在这里插入图片描述


  • 在运行程序前,需要先在HBase中先创建好对应的表和列族
# 进入HBase
[root@sole ~]# hbase shell
# 创建表和列族
hbase(main):001:0> create 'inters:users','base'

  • 程序运行完验证HBase表的行数是否和kafka中topic的日志数一致,如下图map端输入38209条数据,表中共38209条数据,恭喜你顺利完成读写操作!
[root@sole ~]# hbase org.apache.hadoop.hbase.mapreduce.RowCounter 'inters:users'

在这里插入图片描述


PS:如果有写错或者写的不好的地方,欢迎各位大佬在评论区留下宝贵的意见或者建议,敬上!如果这篇博客对您有帮助,希望您可以顺手帮我点个赞!不胜感谢!

原创作者:wsjslient

作者主页:https://blog.csdn.net/wsjslient


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值