文章目录
前言:思路分析
a.读写分别对应不同的操作,这里定义两个接口,分别对应读写操作;
b.读取Kafka数据,这里接口参数设置两个,一个是Properties,另外一个则是topicName;
c…写操作这里不打算传入HBase的Connection,简单粗暴一点直接传入读取的ConsumerRecord直接向HBase写入;
d.至于HBase的连接对象Connection,这里通过其他类来new,并且采用多线程写入的方式(创建连接对象时设置线程数,这部分线程可以放在线程池中,减少JVM的重复开启和关闭的操作)
e.这里笔者需要写入多个不同的表,就将写入方式也单独作为类来写,这样通过调用不同的类即可;
TIPS:建议直接看 5.1 NormalFactory,思路清晰一些
包结构
java包结构如下:
一、poml
- 编码所用依赖如下:
<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>2.0.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>1.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
二、读写接口
2.1 Read
package cn.wsj.services;
import java.io.IOException;
import java.util.Properties;
public interface Read {
public void readKafka(Properties prop,String topicName) throws IOException;
}
2.2 Write
package cn.wsj.services;
import org.apache.kafka.clients.consumer.ConsumerRecords;
public interface Write {
public void writeHBase(ConsumerRecords<String,String> records);
}
三、读Kafka
3.1 KafkaUtils
- KafkaUtils:返回KafkaConsumer实例对象;
package cn.wsj.services.common;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import java.util.Properties;
public class KafkaUtils {
public static KafkaConsumer<String,String> getConsumer(Properties prop){
return new KafkaConsumer<String,String>(prop);
}
}
3.2 KafkaReadImpl
- KafkaReadImpl:实例对象时
需要传入Write实例对象
,为的就是读取Kafka后直接写入HBase;
package cn.wsj.services.kafkareadutil;
import cn.wsj.services.Read;
import cn.wsj.services.Write;
import cn.wsj.services.common.KafkaUtils;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import java.time.Duration;
import java.util.Arrays;
import java.util.Properties;
public class KafkaReadImpl implements Read {
private Write write;
public KafkaReadImpl(Write write){
this.write = write;
}
@Override
public void readKafka(Properties prop, String topicName) {
KafkaConsumer<String, String> consumer = KafkaUtils.getConsumer(prop);
consumer.subscribe(Arrays.asList(topicName));
while(true){
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
write.writeHBase(records);
}
}
}
四、写HBase
4.1 HBaseConf
- HBaseConf:负责创建confiuration对象
package cn.wsj.services.conf;
import org.apache.hadoop.conf.Configuration;
public class HBaseConf {
public static Configuration getConf(){
Configuration config = new Configuration();
config.set("hbase.zookeeper.quorum","sole");
return config;
}
}
4.2 HBaseUtils
- HBaseUtils:负责实例化HBase Connection对象;
package cn.wsj.services.common;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import java.io.IOException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class HBaseUtils {
public static Connection getCon(Configuration config) throws IOException {
ExecutorService es = Executors.newFixedThreadPool(10);
Connection con = ConnectionFactory.createConnection(config, es);
return con;
}
}
4.3 UsersImpl
- UsersImpl:指定HBase Table,之后将传入参数的ConsumerRecords组装为List<Puts>放入HBase表中;
- Put对象实例化时需要传入指定rowkey(Bytes类型);
package cn.wsj.services.hbasewirteimpl;
import cn.wsj.services.Write;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class UsersImpl implements Write {
private Connection con;
public UsersImpl(Connection con){
this.con = con;
}
@Override
public void writeHBase(ConsumerRecords<String, String> records) {
try {
Table table = con.getTable(TableName.valueOf("inters:users"));
List<Put> data = new ArrayList<>();
for (ConsumerRecord<String, String> record : records) {
String[] info = record.value().split(",", -1);
Put put = new Put(info[0].getBytes());
put.addColumn("base".getBytes(),"userid".getBytes(),info[1].getBytes());
put.addColumn("base".getBytes(),"locale".getBytes(),info[1].getBytes());
put.addColumn("base".getBytes(),"birthyear".getBytes(),info[2].getBytes());
put.addColumn("base".getBytes(),"gender".getBytes(),info[3].getBytes());
put.addColumn("base".getBytes(),"joinedAt".getBytes(),info[4].getBytes());
put.addColumn("base".getBytes(),"location".getBytes(),info[5].getBytes());
put.addColumn("base".getBytes(),"timezone".getBytes(),info[6].getBytes());
data.add(put);
}
table.put(data);
} catch (IOException e) {
e.printStackTrace();
}
}
}
五、application运行
5.1 NormalFactory
- NormalFactory:将类组装完成读写流程;
package cn.wsj.services.app;
import cn.wsj.services.Read;
import cn.wsj.services.Write;
import cn.wsj.services.common.HBaseUtils;
import cn.wsj.services.conf.HBaseConf;
import cn.wsj.services.hbasewirteimpl.UsersImpl;
import cn.wsj.services.kafkareadutil.KafkaReadImpl;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.util.Properties;
public class NormalFactory {
public static void run(String topic) throws Exception {
//利用HBaseConf.getConf()获取configuration
//再用HBaseUtils.getCon()获取connection
Connection con = HBaseUtils.getCon(HBaseConf.getConf());
Write write = null;
//这里笔者还有其他的HBase表需要写入,为了方便调用,采用switch case方式
switch (topic){
//如果运行时为users,直接创建UsersImpl实体类,这里将write实例化成功,下面将调用
case "users":write=new UsersImpl(con);break;
default:
throw new Exception("NotFoundTopic");
}
//这里除了读取Kafka消息外,还直接调用write的写入方法(writeHBase),将数据写入Kafka
Read read = new KafkaReadImpl(write);
Properties prop = new Properties();
prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"sole:9092");
prop.put(ConsumerConfig.GROUP_ID_CONFIG,"wsj");
prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getTypeName());
prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class.getTypeName());
prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
read.readKafka(prop,topic);
}
}
5.2 App
- App:main方法调用开始任务
package cn.wsj.services.app;
public class App {
public static void main(String[] args) throws Exception {
NormalFactory.run("users");
}
}
Test
- 这里我先通过Flume向kafka发送日志数据,如下图共计
38209
条数据
- 在运行程序前,需要
先在HBase中先创建好对应的表和列族
# 进入HBase
[root@sole ~]# hbase shell
# 创建表和列族
hbase(main):001:0> create 'inters:users','base'
- 程序运行完验证HBase表的行数是否和kafka中topic的日志数一致,如下图map端输入38209条数据,表中共38209条数据,恭喜你顺利完成读写操作!
[root@sole ~]# hbase org.apache.hadoop.hbase.mapreduce.RowCounter 'inters:users'
PS:如果有写错或者写的不好的地方,欢迎各位大佬在评论区留下宝贵的意见或者建议,敬上!如果这篇博客对您有帮助,希望您可以顺手帮我点个赞!不胜感谢!
原创作者:wsjslient |