直接使用main方法
package kafkatohb;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
/**
* 将Kafka中topic为 user_friends 的数据消费到
* hbase events_db:user_friend中
*/
public class UserFriendToHBCopy {
static int num = 0;
public static void main(String[] args) {
// kafka消费端属性 配置
final Properties prop = new Properties();
prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.232.211:9092");
prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
// prop.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000");
// prop.put(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG,"10000");
// 设置是否自动提交,获取数据的装态 false 手动提交 true 自动提交
prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
prop.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
prop.put(ConsumerConfig.GROUP_ID_CONFIG,"user_friend_group1");
prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(prop);
consumer.subscribe(Collections.singleton("userfriends"));
// 配置hbase信息,连接hbase数据库
Configuration conf = HBaseConfiguration.create();
conf.set(HConstants.HBASE_DIR, "hdfs://192.168.232.211:9000/hbase");
conf.set(HConstants.ZOOKEEPER_QUORUM, "192.168.232.211");
conf.set(HConstants.CLIENT_PORT_STR, "2181");
// "hbase.zookeeper.property.clientPort",
try {
Connection connection = ConnectionFactory.createConnection(conf);
Table userFriendTable = connection
.getTable(TableName.valueOf("events_db:user_friend"));
while (true){//循环拉取数据
ConsumerRecords<String, String> poll = consumer.poll(100);//不是一次性拉取
List<Put> datas = new ArrayList<>();
for (ConsumerRecord record : poll) {
System.out.println(record.value().toString());
String[] split = record.value().toString().split(",");
// 30386403 30279525
Put put = new Put(Bytes.toBytes((split[0] + split[1]).hashCode()));
put.addColumn("uf".getBytes(),"userid".getBytes(),split[0].getBytes());
put.addColumn("uf".getBytes(),"friendid".getBytes(),split[1].getBytes());
datas.add(put);
}
num+=datas.size();
System.out.println("----------------------------------num: " +num);
if(datas.size()!=0) {
userFriendTable.put(datas);
}
try {
Thread.sleep(100);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
使用接口
将kafka传过来的数据作为参数写一个接口,返回put对象集合
不同的kafka topic会有不同的组装方式,将kafka传过来的数据作为参数写一个接口,不同的方式都实现这个接口
package kafkatohb;
import org.apache.hadoop.hbase.client.Put;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import java.util.List;
public interface IParseRecord {
public List<Put> parse(ConsumerRecords<String,String> record);
}
package kafkatohb;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import java.util.ArrayList;
import java.util.List;
/**
* @Author qiaozhengchao
* @Date 2021/6/1
* @Description
*/
public class UserFriendHandler implements IParseRecord {
@Override
public List<Put> parse(ConsumerRecords<String, String> records) {
List<Put> datas = new ArrayList<>();
for (ConsumerRecord record : records) {
System.out.println(record.value().toString());
String[] split = record.value().toString().split(",");
Put put = new Put(Bytes.toBytes((split[0] + split[1]).hashCode()));
put.addColumn("uf".getBytes(),"userid".getBytes(),split[0].getBytes());
put.addColumn("uf".getBytes(),"friendid".getBytes(),split[1].getBytes());
datas.add(put);
}
return datas;
}
}
抽象写的动作
操作hbase最终的结果实际就是往hbase表中写入数据,不同的表写的动作也不相同,接口只关注传入的参数表名和kafka的数据
package kafkatohb.write;
import org.apache.kafka.clients.consumer.ConsumerRecords;
public interface IWriter {
public int write(String tableName, ConsumerRecords<String,String> records);
}
package kafkatohb.write;
import kafkatohb.IParseRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import java.io.IOException;
import java.util.List;
public class HBaseWriter implements IWriter {
private Connection connection=null;
//将handler作为HBaseWriter 的属性传进来
private IParseRecord handler=null;
//创建HBaseWriter对象,可以获取Connection对象和
public HBaseWriter(IParseRecord handler) {
this.handler = handler;
// 配置hbase信息,连接hbase数据库
Configuration conf = HBaseConfiguration.create();
conf.set(HConstants.HBASE_DIR, "hdfs://192.168.232.211:9000/hbase");
conf.set(HConstants.ZOOKEEPER_QUORUM, "192.168.232.211");
conf.set(HConstants.CLIENT_PORT_STR, "2181");
try {
connection = ConnectionFactory.createConnection(conf);
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public int write(String tableName, ConsumerRecords<String, String> records) {
try {
Table table = connection
.getTable(TableName.valueOf(tableName));
List<Put> datas = handler.parse(records);
table.put(datas);
table.close();
return datas.size();
} catch (IOException e) {
e.printStackTrace();
}
return 0;
}
}
Worker
package kafkatohb.write;
public interface IWorker {
//目标表的名字
public void fillData(String gargetName);
}
package kafkatohb.write;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.util.Collections;
import java.util.Properties;
public class Worker implements IWorker {
KafkaConsumer<String, String> consumer;
IWriter writer = null;
public Worker(String topicName, String groupId, IWriter writer){
this.writer = writer;
final Properties prop = new Properties();
prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.232.211:9092");
prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
// prop.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000");
// prop.put(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG,"10000");
// 设置是否自动提交,获取数据的装态 false 手动提交 true 自动提交
prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
prop.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
prop.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
consumer = new KafkaConsumer<>(prop);
consumer.subscribe(Collections.singleton(topicName)); //"user_friends"
}
@Override
public void fillData(String targetName) {
int sum=0;
try {
while (true){
ConsumerRecords<String, String> records = consumer.poll(100);
int count = writer.write(targetName, records);
sum+=count;
System.out.println("处理数据量:" + sum);
Thread.sleep(50);
}
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
main
package kafkatohb;
import kafkatohb.write.HBaseWriter;
import kafkatohb.write.IWorker;
import kafkatohb.write.IWriter;
import kafkatohb.write.Worker;
/**
* 将kafka中topic为user_friends 的数据消费到hbase中的
* events_db:user_friend中
*/
public class UserFriendToHB {
static int num = 0;
public static void main(String[] args) {
IParseRecord handlerUserFriend = new UserFriendHandler();
IWriter hBaseWriter = new HBaseWriter(handlerUserFriend);
IWorker worker = new Worker("userfriends","ufgroup23"
,hBaseWriter);
worker.fillData("events_db:user_friend");
}
}