kafka 三:数据 kafka上传至Hbase
一:- 在 hbase 中创建表
- 建立表空间
create_namespace 'events_db'
- 查看表空间
list_namespace
- 在该表空间下建立表 列族:euof
create 'events_db:event_attendees','euot'
- 在该表空间下建立表 列族:of
create 'events_db:user_friends','of'
二:上传至 hbase
- 将 kafka 中清洗过的 topic 数据,上传至 hbase
案例一
package hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Properties;
/**
* @Description
* 将 kafka中的 topic 中的 event_attendees_raw 数据导入到 hbase event_attendees 表中
* @Author:WuYou on @date 2020/5/27
*/
public class EventAttendsHB {
public static void main(String[] args) {
Properties prop = new Properties();
prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.101.130:9092");
prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class);
prop.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG,"30000");
prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");
prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
prop.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,"1000");
prop.put(ConsumerConfig.GROUP_ID_CONFIG,"eventattendees3");
final KafkaConsumer<String, String> consumer = new KafkaConsumer<>(prop);
consumer.subscribe(Collections.singleton("event_attendees1"));
//hbasease基础配置
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","192.168.101.130");
conf.set("hbase.zookeeper.property.clientPort","2181");
conf.set("hbase.rootdir","hdfs://192.168.101.130:9000/hbase");
try {
final Connection connection = ConnectionFactory.createConnection(conf);
final Table table = connection.getTable(TableName.valueOf("events_db:event_attendees"));
while (true){
ArrayList<Put> datas = new ArrayList<>();
final ConsumerRecords<String, String> records = consumer.poll(100);
for (ConsumerRecord<String, String> record : records) {
System.out.println(record.value());
final String[] info = record.value().toString().split(",");
final Put put = new Put(Bytes.toBytes((info[0] + info[1] + info[2]).hashCode()));
put.addColumn("euot".getBytes(),"eventid".getBytes(),info[0].getBytes());
put.addColumn("euot".getBytes(),"friendid".getBytes(),info[1].getBytes());
put.addColumn("euot".getBytes(),"stat".getBytes(),info[2].getBytes());
datas.add(put);
}
table.put(datas);
// table.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
案例二
package hbase;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
public class UserFriendsHB {
public static void main(String[] args) {
Properties prop = new Properties();
prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.101.130:9092");
prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class);
prop.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG,"30000");
prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");
prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
prop.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,"1000");
prop.put(ConsumerConfig.GROUP_ID_CONFIG,"userfriends2");
final KafkaConsumer<String, String> consumer = new KafkaConsumer<>(prop);
consumer.subscribe(Collections.singleton("user_friends1"));
final HBaseConfiguration conf = new HBaseConfiguration();
conf.set("hbase.zookeeper.quorum","192.168.101.130");
conf.set("hbase.zookeeper.property.clientPort","2181");
conf.set("hbase.rootdir","hdfs://192.168.199.101.130/hbase");
try {
final Connection connection = ConnectionFactory.createConnection(conf);
final Table table = connection.getTable(TableName.valueOf("events_db:user_friends"));
while (true){
final ConsumerRecords<String, String> records = consumer.poll(100);
List<Put> datas = new ArrayList<>();
for (ConsumerRecord<String, String> record : records) {
System.out.println(record.value().toString());
final String[] infos = record.value().split(",");
final Put put = new Put(Bytes.toBytes((infos[0] + infos[1]).hashCode()));
put.addColumn("of".getBytes(),"userid".getBytes(),infos[0].getBytes());
put.addColumn("of".getBytes(),"friendid".getBytes(),infos[1].getBytes());
datas.add(put);
}
table.put(datas);
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
三:查看是否上传至 Hbase
- 速度慢(在 hbase shell 中查看)
hbase(main):015:0> count 'events_db:event_attendees' , INTERVAL => '100000'
- 用 hadoop 中的 mapreduce 查看,速度快 (注意看:不是在 hbase shell 中运行,在本地目录运行即可)
[root@cai bin]# hbase org.apache.hadoop.hbase.mapreduce.RowCounter 'events_db:event_attendees'
五:注意
- 有时报错,因为虚拟机内存给小了,查看内存情况
[root@cai ~]# df -h
Filesystem Size Used Avail Use% Mounted on
devtmpfs 7.8G 0 7.8G 0% /dev
tmpfs 7.8G 0 7.8G 0% /dev/shm
tmpfs 7.8G 12M 7.8G 1% /run
tmpfs 7.8G 0 7.8G 0% /sys/fs/cgroup
/dev/mapper/centos-root 47G 25G 23G 53% /
/dev/sda1 1014M 150M 865M 15% /boot
tmpfs 1.6G 0 1.6G 0% /run/user/0
需要重新分配该虚拟机的内存大小和硬盘大小
也可在本虚拟机上直接挂载硬盘,可自行百度,本文不赘述